Divyansh Agrawal commited on
Commit
4d4e8bd
·
1 Parent(s): 7d6383a

inference

Browse files
Files changed (1) hide show
  1. inference.py +36 -21
inference.py CHANGED
@@ -16,12 +16,15 @@ from AntiAtropos.models import ActionType, SREAction
16
 
17
  load_dotenv()
18
 
19
- API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
20
  MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
21
- API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
 
 
 
22
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
23
 
24
- ENV_URL = os.getenv("ANTIATROPOS_ENV_URL", "https://pranavkk-antiatropos.hf.space")
25
  ENV_MODE = os.getenv("ANTIATROPOS_MODE", "simulated")
26
  TASK_NAME = os.getenv("ANTIATROPOS_TASK", "task-1")
27
  BENCHMARK = os.getenv("ANTIATROPOS_BENCHMARK", "antiatropos")
@@ -67,10 +70,10 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
67
  )
68
 
69
 
70
- def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
71
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
72
  print(
73
- f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}",
74
  flush=True,
75
  )
76
 
@@ -90,6 +93,10 @@ def _task_seed(base_seed: int, task_id: str) -> int:
90
  return int(base_seed + offsets.get(task_id, 0))
91
 
92
 
 
 
 
 
93
  def _hf_web_fallback_url(base_url: str) -> str:
94
  parsed = urlparse(base_url)
95
  host = parsed.netloc.lower()
@@ -243,7 +250,7 @@ async def run_single_task(env: AntiAtroposEnv, client: AsyncOpenAI, task_id: str
243
  log_step(step=step, action=action_str, reward=reward, done=bool(result.done), error=error)
244
 
245
  grade = grader.score()
246
- score = max(0.0, min(1.0, float(grade.composite)))
247
  success = score >= SUCCESS_SCORE_THRESHOLD
248
  return {
249
  "task_id": task_id,
@@ -256,28 +263,36 @@ async def run_single_task(env: AntiAtroposEnv, client: AsyncOpenAI, task_id: str
256
 
257
  async def run_all_tasks() -> None:
258
  _seed_everything(SEED)
259
- task_id = TASK_NAME if TASK_NAME in {"task-1", "task-2", "task-3"} else "task-1"
 
 
 
260
  if not API_KEY:
261
- raise RuntimeError("Missing API key (HF_TOKEN/API_KEY).")
262
 
263
  client = AsyncOpenAI(base_url=API_BASE_URL, api_key=API_KEY)
264
- success = False
265
- steps = 0
266
- score = 0.0
267
- rewards: List[float] = []
268
-
269
- log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
270
 
271
  try:
272
  async with open_env_with_ws_fallback(ENV_URL, MESSAGE_TIMEOUT_S) as env:
273
- report = await run_single_task(env=env, client=client, task_id=task_id)
274
- success = bool(report["success"])
275
- steps = int(report["steps"])
276
- score = float(report["score"])
277
- rewards = list(report["rewards"])
 
 
 
 
 
 
 
 
 
 
 
 
278
  finally:
279
  await client.close()
280
- log_end(success=success, steps=steps, score=score, rewards=rewards)
281
 
282
 
283
  def main() -> None:
@@ -285,4 +300,4 @@ def main() -> None:
285
 
286
 
287
  if __name__ == "__main__":
288
- main()
 
16
 
17
  load_dotenv()
18
 
19
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
20
  MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
21
+ API_KEY = os.getenv("API_KEY")
22
+ if not API_KEY:
23
+ # Local fallback to keep developer runs convenient.
24
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
25
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
26
 
27
+ ENV_URL = os.getenv("ENV_URL") or os.getenv("ANTIATROPOS_ENV_URL", "https://pranavkk-antiatropos.hf.space")
28
  ENV_MODE = os.getenv("ANTIATROPOS_MODE", "simulated")
29
  TASK_NAME = os.getenv("ANTIATROPOS_TASK", "task-1")
30
  BENCHMARK = os.getenv("ANTIATROPOS_BENCHMARK", "antiatropos")
 
70
  )
71
 
72
 
73
+ def log_end(task: str, success: bool, steps: int, score: float, rewards: List[float]) -> None:
74
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
75
  print(
76
+ f"[END] task={task} success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
77
  flush=True,
78
  )
79
 
 
93
  return int(base_seed + offsets.get(task_id, 0))
94
 
95
 
96
+ def _strict_score(score: float, eps: float = 0.001) -> float:
97
+ return min(1.0 - eps, max(eps, float(score)))
98
+
99
+
100
  def _hf_web_fallback_url(base_url: str) -> str:
101
  parsed = urlparse(base_url)
102
  host = parsed.netloc.lower()
 
250
  log_step(step=step, action=action_str, reward=reward, done=bool(result.done), error=error)
251
 
252
  grade = grader.score()
253
+ score = _strict_score(float(grade.composite))
254
  success = score >= SUCCESS_SCORE_THRESHOLD
255
  return {
256
  "task_id": task_id,
 
263
 
264
  async def run_all_tasks() -> None:
265
  _seed_everything(SEED)
266
+ all_tasks = ["task-1", "task-2", "task-3"]
267
+ run_single = os.getenv("ANTIATROPOS_RUN_SINGLE_TASK", "false").lower() == "true"
268
+ task_id = TASK_NAME if TASK_NAME in set(all_tasks) else "task-1"
269
+ tasks_to_run = [task_id] if run_single else all_tasks
270
  if not API_KEY:
271
+ raise RuntimeError("Missing API key (API_KEY/HF_TOKEN/OPENAI_API_KEY).")
272
 
273
  client = AsyncOpenAI(base_url=API_BASE_URL, api_key=API_KEY)
 
 
 
 
 
 
274
 
275
  try:
276
  async with open_env_with_ws_fallback(ENV_URL, MESSAGE_TIMEOUT_S) as env:
277
+ for task in tasks_to_run:
278
+ success = False
279
+ steps = 0
280
+ score = 0.001
281
+ rewards: List[float] = []
282
+ log_start(task=task, env=BENCHMARK, model=MODEL_NAME)
283
+ try:
284
+ report = await run_single_task(env=env, client=client, task_id=task)
285
+ success = bool(report["success"])
286
+ steps = int(report["steps"])
287
+ score = _strict_score(float(report["score"]))
288
+ rewards = list(report["rewards"])
289
+ except Exception as exc:
290
+ print(f"[DEBUG] task={task} error={exc}", flush=True)
291
+ score = 0.001
292
+ finally:
293
+ log_end(task=task, success=success, steps=steps, score=score, rewards=rewards)
294
  finally:
295
  await client.close()
 
296
 
297
 
298
  def main() -> None:
 
300
 
301
 
302
  if __name__ == "__main__":
303
+ main()