Souravdanyal commited on
Commit
2efa047
Β·
1 Parent(s): 29416b7

working..

Browse files
inference.py CHANGED
@@ -2,75 +2,86 @@
2
  """
3
  inference.py - Code Debug Environment Baseline Agent
4
 
5
- Required env vars: API_BASE_URL, MODEL_NAME, and one of API_KEY/GROQ_API_KEY/OPENAI_API_KEY/HF_TOKEN
6
  Usage:
7
  python inference.py
8
  python inference.py --url https://Souravdanyal-code-debug-env.hf.space
9
  python inference.py --difficulty easy
10
 
11
- STDOUT FORMAT (strictly required by evaluator - JSON):
12
- {"type": "START", "task": "<id>", "env": "<benchmark>", "model": "<model>"}
13
- {"type": "STEP", "step": <n>, "action": "<str>", "reward": <0.00>, "done": <bool>, "error": <msg|null>}
14
- {"type": "END", "success": <bool>, "steps": <n>, "score": <0.000>, "rewards": [<r1>, <r2>, ...]}
15
  """
16
 
17
  import os, sys, json, time, argparse, requests, re
18
  from openai import OpenAI
19
  from typing import List, Optional
20
 
 
 
 
 
 
 
21
 
22
- def _read_env(*names: str) -> tuple[str, Optional[str]]:
23
- """Return first non-empty env value and the matched variable name."""
24
- for name in names:
25
- for candidate in (name, name.lower()):
26
- val = os.environ.get(candidate)
27
- if val and val.strip():
28
- return val.strip(), candidate
29
- return "", None
30
 
31
  # ── Config ────────────────────────────────────────────────────────────────────
32
- API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.groq.com/openai/v1")
33
- MODEL_NAME = os.environ.get("MODEL_NAME", "llama-3.1-8b-instant")
34
-
35
- # Accept common provider key names, including lowercase variants.
36
- API_KEY, API_KEY_SOURCE = _read_env("API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "HF_TOKEN")
37
- ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
 
 
 
 
38
  BENCHMARK = "code-debug-env"
39
  MAX_STEPS = 5
40
  SUCCESS_SCORE_THRESHOLD = 0.5
41
 
42
- client = OpenAI(api_key=API_KEY or "dummy", base_url=API_BASE_URL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # ── Logging β€” STRICT JSON FORMAT ─────────────────────────────────────────────
45
  def log_start(task_id: str, env: str, model: str) -> None:
46
- log_entry = {
47
- "type": "START",
48
- "task": task_id,
49
- "env": env,
50
- "model": model
51
- }
52
- print(json.dumps(log_entry), flush=True)
53
 
54
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
55
- log_entry = {
56
- "type": "STEP",
57
- "step": step,
58
- "action": action,
59
- "reward": round(reward, 2),
60
- "done": done,
61
- "error": error
62
- }
63
- print(json.dumps(log_entry), flush=True)
64
 
65
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
66
- log_entry = {
67
- "type": "END",
68
- "success": success,
69
- "steps": steps,
70
- "score": round(score, 3),
71
- "rewards": [round(r, 2) for r in rewards]
72
- }
73
- print(json.dumps(log_entry), flush=True)
74
 
75
  # ── Env client ────────────────────────────────────────────────────────────────
76
  def env_reset(url: str, difficulty: str) -> dict:
@@ -273,19 +284,19 @@ def run_episode(env_url: str, difficulty: str) -> tuple:
273
  # ── Main ──────────────────────────────────────────────────────────────────────
274
  def main():
275
  parser = argparse.ArgumentParser(description="Code Debug Environment Baseline Agent")
276
- parser.add_argument("--url", default=ENV_URL)
277
  parser.add_argument("--difficulty", default=None, choices=["easy", "medium", "hard", "all"])
278
  args = parser.parse_args()
279
  url = args.url.rstrip("/")
280
 
281
- if not API_KEY:
282
  print(
283
- "# Missing API key. Set one of: API_KEY, GROQ_API_KEY, OPENAI_API_KEY, HF_TOKEN (or lowercase variants)",
284
  file=sys.stderr,
285
  flush=True,
286
  )
287
  sys.exit(1)
288
- print(f"# Using API key from {API_KEY_SOURCE}", file=sys.stderr, flush=True)
289
 
290
  # Health check
291
  try:
@@ -314,4 +325,5 @@ def main():
314
 
315
 
316
  if __name__ == "__main__":
317
- main()
 
 
2
  """
3
  inference.py - Code Debug Environment Baseline Agent
4
 
5
+ Required env vars: API_BASE_URL, MODEL_NAME, HF_TOKEN
6
  Usage:
7
  python inference.py
8
  python inference.py --url https://Souravdanyal-code-debug-env.hf.space
9
  python inference.py --difficulty easy
10
 
11
+ STDOUT FORMAT (strictly required by evaluator - plaintext):
12
+ [START] task=<id> env=<benchmark> model=<model>
13
+ [STEP] step=<n> action=<str> reward=<0.00> done=<true|false> error=<msg|null>
14
+ [END] success=<true|false> steps=<n> score=<0.000> rewards=[<r1>,<r2>,...]
15
  """
16
 
17
  import os, sys, json, time, argparse, requests, re
18
  from openai import OpenAI
19
  from typing import List, Optional
20
 
21
+ # Load .env file if it exists
22
+ try:
23
+ from dotenv import load_dotenv
24
+ load_dotenv()
25
+ except ImportError:
26
+ pass # dotenv not installed, will use system env vars
27
 
 
 
 
 
 
 
 
 
28
 
29
  # ── Config ────────────────────────────────────────────────────────────────────
30
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://api.groq.com/openai/v1")
31
+ MODEL_NAME = os.getenv("MODEL_NAME", "llama-3.1-8b-instant")
32
+ HF_TOKEN = os.getenv("HF_TOKEN")
33
+ HF_TOKEN_SOURCE = "HF_TOKEN"
34
+ if not HF_TOKEN:
35
+ HF_TOKEN = os.getenv("hf_token")
36
+ HF_TOKEN_SOURCE = "hf_token"
37
+ # Optional when using from_docker_image():
38
+ LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
39
+ ENV_URL = os.getenv("ENV_URL")
40
  BENCHMARK = "code-debug-env"
41
  MAX_STEPS = 5
42
  SUCCESS_SCORE_THRESHOLD = 0.5
43
 
44
+ client = OpenAI(api_key=HF_TOKEN or "dummy", base_url=API_BASE_URL)
45
+
46
+ # ── Logging β€” STRICT PLAINTEXT FORMAT ────────────────────────────────────────
47
+ def _format_bool(value: bool) -> str:
48
+ return "true" if value else "false"
49
+
50
+
51
+ def _normalize_token(value: str) -> str:
52
+ return re.sub(r"\s+", " ", str(value)).strip()
53
+
54
+
55
+ def _format_error(error: Optional[str]) -> str:
56
+ if error is None:
57
+ return "null"
58
+ cleaned = _normalize_token(error)
59
+ return cleaned if cleaned else "null"
60
+
61
+
62
+ def _format_rewards(rewards: List[float]) -> str:
63
+ return "[" + ",".join(f"{round(r, 2):.2f}" for r in rewards) + "]"
64
+
65
 
 
66
  def log_start(task_id: str, env: str, model: str) -> None:
67
+ print(
68
+ f"[START] task={_normalize_token(task_id)} env={_normalize_token(env)} model={_normalize_token(model)}",
69
+ flush=True,
70
+ )
 
 
 
71
 
72
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
73
+ print(
74
+ f"[STEP] step={step} action={_normalize_token(action)} reward={round(reward, 2):.2f} "
75
+ f"done={_format_bool(done)} error={_format_error(error)}",
76
+ flush=True,
77
+ )
 
 
 
 
78
 
79
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
80
+ print(
81
+ f"[END] success={_format_bool(success)} steps={steps} score={round(score, 3):.3f} "
82
+ f"rewards={_format_rewards(rewards)}",
83
+ flush=True,
84
+ )
 
 
 
85
 
86
  # ── Env client ────────────────────────────────────────────────────────────────
87
  def env_reset(url: str, difficulty: str) -> dict:
 
284
  # ── Main ──────────────────────────────────────────────────────────────────────
285
  def main():
286
  parser = argparse.ArgumentParser(description="Code Debug Environment Baseline Agent")
287
+ parser.add_argument("--url", default=ENV_URL or "http://localhost:7860")
288
  parser.add_argument("--difficulty", default=None, choices=["easy", "medium", "hard", "all"])
289
  args = parser.parse_args()
290
  url = args.url.rstrip("/")
291
 
292
+ if not HF_TOKEN:
293
  print(
294
+ "# Missing HF_TOKEN (or lowercase hf_token).",
295
  file=sys.stderr,
296
  flush=True,
297
  )
298
  sys.exit(1)
299
+ print(f"# Using API key from {HF_TOKEN_SOURCE}", file=sys.stderr, flush=True)
300
 
301
  # Health check
302
  try:
 
325
 
326
 
327
  if __name__ == "__main__":
328
+ main()
329
+
validator/__pycache__/pre_submit_check.cpython-39.pyc CHANGED
Binary files a/validator/__pycache__/pre_submit_check.cpython-39.pyc and b/validator/__pycache__/pre_submit_check.cpython-39.pyc differ
 
validator/pre_submit_check.py CHANGED
@@ -153,13 +153,15 @@ def run_checks(base_url: str):
153
  try:
154
  with open("inference.py") as f:
155
  content = f.read()
156
- has_start = '"type": "START"' in content
157
- has_step = '"type": "STEP"' in content
158
- has_end = '"type": "END"' in content
 
159
  check("inference.py emits [START] logs", has_start)
160
  check("inference.py emits [STEP] logs", has_step)
161
  check("inference.py emits [END] logs", has_end)
162
- all_passed &= has_start and has_step and has_end
 
163
  except Exception as e:
164
  check("inference.py log format", False, str(e))
165
  all_passed = False
 
153
  try:
154
  with open("inference.py") as f:
155
  content = f.read()
156
+ has_start = "[START] task=" in content
157
+ has_step = "[STEP] step=" in content
158
+ has_end = "[END] success=" in content
159
+ avoids_json_logs = "print(json.dumps(log_entry)" not in content
160
  check("inference.py emits [START] logs", has_start)
161
  check("inference.py emits [STEP] logs", has_step)
162
  check("inference.py emits [END] logs", has_end)
163
+ check("inference.py avoids JSON log dict dumps", avoids_json_logs)
164
+ all_passed &= has_start and has_step and has_end and avoids_json_logs
165
  except Exception as e:
166
  check("inference.py log format", False, str(e))
167
  all_passed = False