Divyansh Agrawal commited on
Commit ·
4d4e8bd
1
Parent(s): 7d6383a
inference
Browse files- inference.py +36 -21
inference.py
CHANGED
|
@@ -16,12 +16,15 @@ from AntiAtropos.models import ActionType, SREAction
|
|
| 16 |
|
| 17 |
load_dotenv()
|
| 18 |
|
| 19 |
-
API_BASE_URL = os.getenv("API_BASE_URL", "https://
|
| 20 |
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
|
| 21 |
-
API_KEY = os.getenv("
|
|
|
|
|
|
|
|
|
|
| 22 |
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
|
| 23 |
|
| 24 |
-
ENV_URL = os.getenv("ANTIATROPOS_ENV_URL", "https://pranavkk-antiatropos.hf.space")
|
| 25 |
ENV_MODE = os.getenv("ANTIATROPOS_MODE", "simulated")
|
| 26 |
TASK_NAME = os.getenv("ANTIATROPOS_TASK", "task-1")
|
| 27 |
BENCHMARK = os.getenv("ANTIATROPOS_BENCHMARK", "antiatropos")
|
|
@@ -67,10 +70,10 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
|
|
| 67 |
)
|
| 68 |
|
| 69 |
|
| 70 |
-
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 71 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 72 |
print(
|
| 73 |
-
f"[END] success={str(success).lower()} steps={steps} score={score:.
|
| 74 |
flush=True,
|
| 75 |
)
|
| 76 |
|
|
@@ -90,6 +93,10 @@ def _task_seed(base_seed: int, task_id: str) -> int:
|
|
| 90 |
return int(base_seed + offsets.get(task_id, 0))
|
| 91 |
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def _hf_web_fallback_url(base_url: str) -> str:
|
| 94 |
parsed = urlparse(base_url)
|
| 95 |
host = parsed.netloc.lower()
|
|
@@ -243,7 +250,7 @@ async def run_single_task(env: AntiAtroposEnv, client: AsyncOpenAI, task_id: str
|
|
| 243 |
log_step(step=step, action=action_str, reward=reward, done=bool(result.done), error=error)
|
| 244 |
|
| 245 |
grade = grader.score()
|
| 246 |
-
score =
|
| 247 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 248 |
return {
|
| 249 |
"task_id": task_id,
|
|
@@ -256,28 +263,36 @@ async def run_single_task(env: AntiAtroposEnv, client: AsyncOpenAI, task_id: str
|
|
| 256 |
|
| 257 |
async def run_all_tasks() -> None:
|
| 258 |
_seed_everything(SEED)
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
| 260 |
if not API_KEY:
|
| 261 |
-
raise RuntimeError("Missing API key (HF_TOKEN/
|
| 262 |
|
| 263 |
client = AsyncOpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 264 |
-
success = False
|
| 265 |
-
steps = 0
|
| 266 |
-
score = 0.0
|
| 267 |
-
rewards: List[float] = []
|
| 268 |
-
|
| 269 |
-
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
|
| 270 |
|
| 271 |
try:
|
| 272 |
async with open_env_with_ws_fallback(ENV_URL, MESSAGE_TIMEOUT_S) as env:
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
finally:
|
| 279 |
await client.close()
|
| 280 |
-
log_end(success=success, steps=steps, score=score, rewards=rewards)
|
| 281 |
|
| 282 |
|
| 283 |
def main() -> None:
|
|
@@ -285,4 +300,4 @@ def main() -> None:
|
|
| 285 |
|
| 286 |
|
| 287 |
if __name__ == "__main__":
|
| 288 |
-
main()
|
|
|
|
| 16 |
|
| 17 |
load_dotenv()
|
| 18 |
|
| 19 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 20 |
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
|
| 21 |
+
API_KEY = os.getenv("API_KEY")
|
| 22 |
+
if not API_KEY:
|
| 23 |
+
# Local fallback to keep developer runs convenient.
|
| 24 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
|
| 25 |
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
|
| 26 |
|
| 27 |
+
ENV_URL = os.getenv("ENV_URL") or os.getenv("ANTIATROPOS_ENV_URL", "https://pranavkk-antiatropos.hf.space")
|
| 28 |
ENV_MODE = os.getenv("ANTIATROPOS_MODE", "simulated")
|
| 29 |
TASK_NAME = os.getenv("ANTIATROPOS_TASK", "task-1")
|
| 30 |
BENCHMARK = os.getenv("ANTIATROPOS_BENCHMARK", "antiatropos")
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
|
| 73 |
+
def log_end(task: str, success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 74 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 75 |
print(
|
| 76 |
+
f"[END] task={task} success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
|
| 77 |
flush=True,
|
| 78 |
)
|
| 79 |
|
|
|
|
| 93 |
return int(base_seed + offsets.get(task_id, 0))
|
| 94 |
|
| 95 |
|
| 96 |
+
def _strict_score(score: float, eps: float = 0.001) -> float:
|
| 97 |
+
return min(1.0 - eps, max(eps, float(score)))
|
| 98 |
+
|
| 99 |
+
|
| 100 |
def _hf_web_fallback_url(base_url: str) -> str:
|
| 101 |
parsed = urlparse(base_url)
|
| 102 |
host = parsed.netloc.lower()
|
|
|
|
| 250 |
log_step(step=step, action=action_str, reward=reward, done=bool(result.done), error=error)
|
| 251 |
|
| 252 |
grade = grader.score()
|
| 253 |
+
score = _strict_score(float(grade.composite))
|
| 254 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 255 |
return {
|
| 256 |
"task_id": task_id,
|
|
|
|
| 263 |
|
| 264 |
async def run_all_tasks() -> None:
|
| 265 |
_seed_everything(SEED)
|
| 266 |
+
all_tasks = ["task-1", "task-2", "task-3"]
|
| 267 |
+
run_single = os.getenv("ANTIATROPOS_RUN_SINGLE_TASK", "false").lower() == "true"
|
| 268 |
+
task_id = TASK_NAME if TASK_NAME in set(all_tasks) else "task-1"
|
| 269 |
+
tasks_to_run = [task_id] if run_single else all_tasks
|
| 270 |
if not API_KEY:
|
| 271 |
+
raise RuntimeError("Missing API key (API_KEY/HF_TOKEN/OPENAI_API_KEY).")
|
| 272 |
|
| 273 |
client = AsyncOpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
try:
|
| 276 |
async with open_env_with_ws_fallback(ENV_URL, MESSAGE_TIMEOUT_S) as env:
|
| 277 |
+
for task in tasks_to_run:
|
| 278 |
+
success = False
|
| 279 |
+
steps = 0
|
| 280 |
+
score = 0.001
|
| 281 |
+
rewards: List[float] = []
|
| 282 |
+
log_start(task=task, env=BENCHMARK, model=MODEL_NAME)
|
| 283 |
+
try:
|
| 284 |
+
report = await run_single_task(env=env, client=client, task_id=task)
|
| 285 |
+
success = bool(report["success"])
|
| 286 |
+
steps = int(report["steps"])
|
| 287 |
+
score = _strict_score(float(report["score"]))
|
| 288 |
+
rewards = list(report["rewards"])
|
| 289 |
+
except Exception as exc:
|
| 290 |
+
print(f"[DEBUG] task={task} error={exc}", flush=True)
|
| 291 |
+
score = 0.001
|
| 292 |
+
finally:
|
| 293 |
+
log_end(task=task, success=success, steps=steps, score=score, rewards=rewards)
|
| 294 |
finally:
|
| 295 |
await client.close()
|
|
|
|
| 296 |
|
| 297 |
|
| 298 |
def main() -> None:
|
|
|
|
| 300 |
|
| 301 |
|
| 302 |
if __name__ == "__main__":
|
| 303 |
+
main()
|