savetrees commited on
Commit
48b0373
·
verified ·
1 Parent(s): 766521e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -1
  2. inference.py +116 -68
Dockerfile CHANGED
@@ -12,8 +12,12 @@ WORKDIR /app
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir -r requirements.txt
14
 
15
- # Copy package source
16
  COPY bug_triage_env/ ./bug_triage_env/
 
 
 
 
17
 
18
  # Expose OpenEnv standard port
19
  EXPOSE 8000
 
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir -r requirements.txt
14
 
15
+ # Copy all required files
16
  COPY bug_triage_env/ ./bug_triage_env/
17
+ COPY openenv.yaml .
18
+ COPY inference.py .
19
+ COPY README.md .
20
+ COPY pyproject.toml .
21
 
22
  # Expose OpenEnv standard port
23
  EXPOSE 8000
inference.py CHANGED
@@ -1,10 +1,12 @@
1
  #!/usr/bin/env python3
2
  """
3
- MANDATORY OpenEnv Interface Script for Bug Triage.
4
- Complies strictly with Hackathon evaluation requirements:
5
- - Uses OpenAI Client exclusively.
6
- - Emits [START], [STEP], and [END] structured stdout correctly.
7
- - Uses API_BASE_URL, MODEL_NAME, and HF_TOKEN.
 
 
8
  """
9
 
10
  import os
@@ -15,15 +17,18 @@ import requests
15
  from typing import List, Optional
16
  from openai import OpenAI
17
 
18
- # Required Hackathon Variables
19
  API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
20
  MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
21
- HF_TOKEN = os.getenv("HF_TOKEN", os.getenv("OPENAI_API_KEY", ""))
22
 
23
  ENV_URL = os.getenv("BUG_TRIAGE_ENV_URL", "http://localhost:8000")
24
  BENCHMARK = "bug_triage_openenv"
25
  TASKS = ["task_1", "task_2", "task_3"]
 
 
26
 
 
27
  SYSTEM_PROMPT = (
28
  "You are a senior software engineer performing bug triage.\n"
29
  "You will receive a bug report and must respond with a JSON object.\n"
@@ -31,7 +36,7 @@ SYSTEM_PROMPT = (
31
  "Available priorities: low, medium, high, critical\n"
32
  "Available developers: Alice, Bob, Carol, David, Eve\n"
33
  "Available actions: fix_immediately, schedule_sprint, needs_more_info, wontfix, duplicate\n"
34
- "IMPORTANT: Respond with ONLY valid JSON."
35
  )
36
 
37
  TASK_PROMPTS = {
@@ -40,126 +45,169 @@ TASK_PROMPTS = {
40
  "task_3": 'Respond ONLY with JSON: {"task_id": "task_3", "bug_type": "<type>", "priority": "<priority>", "assigned_developer": "<dev>", "suggested_action": "<action>"}',
41
  }
42
 
 
 
 
 
 
 
 
 
 
43
 
44
- # --- Strictly Defined Evaluation Logging Methods ---
 
45
 
46
- def log_start(task: str, env_name: str, model: str) -> None:
47
- print(f"[START] task={task} env={env_name} model={model}", flush=True)
48
 
49
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None) -> None:
50
  print(f"[STEP] step={step} action={action!r} reward={reward:.3f} done={str(done).lower()} error={error or ''}", flush=True)
51
 
 
52
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
53
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
54
  print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
55
 
56
- # ---------------------------------------------------
57
 
 
58
 
59
  def build_user_prompt(bug_report: dict, task_id: str) -> str:
60
- return textwrap.dedent(
61
- f"""
62
- Bug: {bug_report.get('title')}
63
- Desc: {bug_report.get('description')}
64
  Logs: {bug_report.get('logs', 'N/A')}
 
 
65
  {TASK_PROMPTS[task_id]}
66
- """
67
- ).strip()
68
 
69
  def get_model_action(client: OpenAI, user_prompt: str, task_id: str) -> str:
 
70
  try:
71
- completion = client.chat.completions.create(
72
- model=MODEL_NAME,
73
- messages=[
74
- {"role": "system", "content": SYSTEM_PROMPT},
75
- {"role": "user", "content": user_prompt},
76
- ],
77
- temperature=0.2,
78
- response_format={"type": "json_object"},
79
- stream=False,
80
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  text = (completion.choices[0].message.content or "").strip()
82
-
83
- # Ensure it's valid JSON, else fallback
 
 
 
 
 
 
84
  parsed = json.loads(text)
85
  parsed["task_id"] = task_id
86
  return json.dumps(parsed)
 
87
  except Exception as exc:
88
  print(f"[DEBUG] Model request failed: {exc}", flush=True)
89
- # Safe fallback based on task
90
- fallback_schemas = {
91
- "task_1": {"task_id": "task_1", "bug_type": "crash"},
92
- "task_2": {"task_id": "task_2", "priority": "medium"},
93
- "task_3": {"task_id": "task_3", "bug_type": "crash", "priority": "medium", "assigned_developer": "Alice", "suggested_action": "fix_immediately"},
94
- }
95
- return json.dumps(fallback_schemas[task_id])
96
-
97
- def main():
98
- if not HF_TOKEN:
99
- print("ERROR: HF_TOKEN environment variable is not set.", file=sys.stderr)
100
  sys.exit(1)
101
 
102
- client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
103
 
104
- # Sanity check URL
105
  try:
106
- requests.get(f"{ENV_URL}/health", timeout=5).raise_for_status()
 
107
  except Exception as e:
108
- print(f"[DEBUG] Failed to connect to server: {e}", flush=True)
109
  sys.exit(1)
110
 
111
  overall_scores = []
112
-
113
- # We loop each task. In Bug Triage, it's a 1-step episode environment natively.
114
  for task_name in TASKS:
115
- log_start(task=task_name, env_name=BENCHMARK, model=MODEL_NAME)
116
-
117
  rewards: List[float] = []
118
  steps_taken = 0
119
  score = 0.0
120
  success = False
121
-
122
  try:
123
  # reset()
124
- reset_resp = requests.post(f"{ENV_URL}/reset", json={"task_id": task_name}, timeout=30)
 
 
 
 
125
  reset_resp.raise_for_status()
126
  obs = reset_resp.json()
127
  episode_id = obs["episode_id"]
128
-
129
- # Formulate action
130
  user_prompt = build_user_prompt(obs.get("bug_report", {}), task_name)
131
  action_json_str = get_model_action(client, user_prompt, task_name)
132
-
133
- # step() - Since bug triage is a 1-step env, step == 1
134
  step = 1
135
  action_payload = json.loads(action_json_str)
136
-
137
- step_resp = requests.post(f"{ENV_URL}/step", json={"episode_id": episode_id, "action": action_payload}, timeout=30)
 
 
 
 
138
  step_resp.raise_for_status()
139
  step_data = step_resp.json()
140
-
141
  reward = step_data.get("reward", 0.0)
142
  done = step_data.get("done", True)
143
- error = None # No python exception from HTTP
144
  grader_score = step_data.get("grader_score", 0.0)
145
-
146
  rewards.append(reward)
147
  steps_taken = step
148
-
149
- log_step(step=step, action=action_json_str, reward=reward, done=done, error=error)
150
-
151
- # Grade constraints
152
- score = grader_score
153
- success = score >= 0.5 # Pass condition
154
 
155
  except Exception as e:
156
- print(f"[DEBUG] Environment interaction error: {e}", flush=True)
157
  success = False
158
  score = 0.0
159
-
160
  finally:
161
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
162
  overall_scores.append(score)
163
 
 
 
 
 
 
164
  if __name__ == "__main__":
165
  main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ Hackathon Inference Script for Bug Triage OpenEnv.
4
+
5
+ MANDATORY REQUIREMENTS:
6
+ - Uses OpenAI Client exclusively for all LLM calls.
7
+ - Reads API_BASE_URL, MODEL_NAME, and HF_TOKEN from environment.
8
+ - Emits structured [START], [STEP], and [END] logs to stdout.
9
+ - Completes in under 20 minutes on 2 vCPU / 8 GB RAM.
10
  """
11
 
12
  import os
 
17
  from typing import List, Optional
18
  from openai import OpenAI
19
 
20
+ # ----- Required Hackathon Variables -----
21
  API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
22
  MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
23
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
24
 
25
  ENV_URL = os.getenv("BUG_TRIAGE_ENV_URL", "http://localhost:8000")
26
  BENCHMARK = "bug_triage_openenv"
27
  TASKS = ["task_1", "task_2", "task_3"]
28
+ TEMPERATURE = 0.2
29
+ MAX_TOKENS = 300
30
 
31
+ # ----- System Prompt -----
32
  SYSTEM_PROMPT = (
33
  "You are a senior software engineer performing bug triage.\n"
34
  "You will receive a bug report and must respond with a JSON object.\n"
 
36
  "Available priorities: low, medium, high, critical\n"
37
  "Available developers: Alice, Bob, Carol, David, Eve\n"
38
  "Available actions: fix_immediately, schedule_sprint, needs_more_info, wontfix, duplicate\n"
39
+ "IMPORTANT: Respond with ONLY a valid JSON object. No markdown, no explanation, no extra text."
40
  )
41
 
42
  TASK_PROMPTS = {
 
45
  "task_3": 'Respond ONLY with JSON: {"task_id": "task_3", "bug_type": "<type>", "priority": "<priority>", "assigned_developer": "<dev>", "suggested_action": "<action>"}',
46
  }
47
 
48
+ FALLBACK_ACTIONS = {
49
+ "task_1": {"task_id": "task_1", "bug_type": "crash"},
50
+ "task_2": {"task_id": "task_2", "priority": "medium"},
51
+ "task_3": {"task_id": "task_3", "bug_type": "crash", "priority": "medium",
52
+ "assigned_developer": "Alice", "suggested_action": "fix_immediately"},
53
+ }
54
+
55
+
56
+ # ----- Structured Logging (matches sample inference.py exactly) -----
57
 
58
+ def log_start(task: str, env: str, model: str) -> None:
59
+ print(f"[START] task={task} env={env} model={model}", flush=True)
60
 
 
 
61
 
62
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None) -> None:
63
  print(f"[STEP] step={step} action={action!r} reward={reward:.3f} done={str(done).lower()} error={error or ''}", flush=True)
64
 
65
+
66
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
67
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
68
  print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
69
 
 
70
 
71
+ # ----- LLM Interaction -----
72
 
73
  def build_user_prompt(bug_report: dict, task_id: str) -> str:
74
+ return textwrap.dedent(f"""
75
+ Bug Title: {bug_report.get('title', 'N/A')}
76
+ Description: {bug_report.get('description', 'N/A')}
 
77
  Logs: {bug_report.get('logs', 'N/A')}
78
+ Environment: {bug_report.get('environment', 'N/A')}
79
+
80
  {TASK_PROMPTS[task_id]}
81
+ """).strip()
82
+
83
 
84
  def get_model_action(client: OpenAI, user_prompt: str, task_id: str) -> str:
85
+ """Call the LLM and return a JSON action string. Falls back safely on any error."""
86
  try:
87
+ # Try with response_format first (works with OpenAI and some HF models)
88
+ try:
89
+ completion = client.chat.completions.create(
90
+ model=MODEL_NAME,
91
+ messages=[
92
+ {"role": "system", "content": SYSTEM_PROMPT},
93
+ {"role": "user", "content": user_prompt},
94
+ ],
95
+ temperature=TEMPERATURE,
96
+ max_tokens=MAX_TOKENS,
97
+ response_format={"type": "json_object"},
98
+ stream=False,
99
+ )
100
+ except Exception:
101
+ # Fallback: some models do not support response_format
102
+ completion = client.chat.completions.create(
103
+ model=MODEL_NAME,
104
+ messages=[
105
+ {"role": "system", "content": SYSTEM_PROMPT},
106
+ {"role": "user", "content": user_prompt},
107
+ ],
108
+ temperature=TEMPERATURE,
109
+ max_tokens=MAX_TOKENS,
110
+ stream=False,
111
+ )
112
+
113
  text = (completion.choices[0].message.content or "").strip()
114
+
115
+ # Strip markdown code fences if present
116
+ if text.startswith("```"):
117
+ text = text.split("\n", 1)[-1]
118
+ if text.endswith("```"):
119
+ text = text.rsplit("```", 1)[0]
120
+ text = text.strip()
121
+
122
  parsed = json.loads(text)
123
  parsed["task_id"] = task_id
124
  return json.dumps(parsed)
125
+
126
  except Exception as exc:
127
  print(f"[DEBUG] Model request failed: {exc}", flush=True)
128
+ return json.dumps(FALLBACK_ACTIONS[task_id])
129
+
130
+
131
+ # ----- Main Entry Point -----
132
+
133
+ def main() -> None:
134
+ if not API_KEY:
135
+ print("ERROR: HF_TOKEN / API_KEY environment variable is not set.", file=sys.stderr)
 
 
 
136
  sys.exit(1)
137
 
138
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
139
 
140
+ # Verify the environment server is reachable
141
  try:
142
+ resp = requests.get(f"{ENV_URL}/health", timeout=10)
143
+ resp.raise_for_status()
144
  except Exception as e:
145
+ print(f"[DEBUG] Cannot reach environment at {ENV_URL}: {e}", flush=True)
146
  sys.exit(1)
147
 
148
  overall_scores = []
149
+
 
150
  for task_name in TASKS:
151
+ log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
152
+
153
  rewards: List[float] = []
154
  steps_taken = 0
155
  score = 0.0
156
  success = False
157
+
158
  try:
159
  # reset()
160
+ reset_resp = requests.post(
161
+ f"{ENV_URL}/reset",
162
+ json={"task_id": task_name},
163
+ timeout=30,
164
+ )
165
  reset_resp.raise_for_status()
166
  obs = reset_resp.json()
167
  episode_id = obs["episode_id"]
168
+
169
+ # Build prompt and get LLM action
170
  user_prompt = build_user_prompt(obs.get("bug_report", {}), task_name)
171
  action_json_str = get_model_action(client, user_prompt, task_name)
172
+
173
+ # step()
174
  step = 1
175
  action_payload = json.loads(action_json_str)
176
+
177
+ step_resp = requests.post(
178
+ f"{ENV_URL}/step",
179
+ json={"episode_id": episode_id, "action": action_payload},
180
+ timeout=30,
181
+ )
182
  step_resp.raise_for_status()
183
  step_data = step_resp.json()
184
+
185
  reward = step_data.get("reward", 0.0)
186
  done = step_data.get("done", True)
 
187
  grader_score = step_data.get("grader_score", 0.0)
188
+
189
  rewards.append(reward)
190
  steps_taken = step
191
+
192
+ log_step(step=step, action=action_json_str, reward=reward, done=done, error=None)
193
+
194
+ score = grader_score if grader_score is not None else 0.0
195
+ score = min(max(score, 0.0), 1.0)
196
+ success = score >= 0.5
197
 
198
  except Exception as e:
199
+ print(f"[DEBUG] Episode error: {e}", flush=True)
200
  success = False
201
  score = 0.0
202
+
203
  finally:
204
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
205
  overall_scores.append(score)
206
 
207
+ # Summary
208
+ final_mean = sum(overall_scores) / len(overall_scores) if overall_scores else 0.0
209
+ print(f"\nFinal mean score: {final_mean:.4f}", flush=True)
210
+
211
+
212
  if __name__ == "__main__":
213
  main()