Spaces:
Sleeping
Sleeping
Commit ·
640bba9
1
Parent(s): 2a3cac2
Implement native urllib REST client to bypass sandbox missing dependencies and successfully hit LiteLLM proxy
Browse files- inference.py +97 -44
inference.py
CHANGED
|
@@ -275,43 +275,6 @@ def normalize_action(payload: dict[str, Any]) -> dict[str, Any]:
|
|
| 275 |
}
|
| 276 |
|
| 277 |
|
| 278 |
-
def build_llm_action(
|
| 279 |
-
client: Any,
|
| 280 |
-
obs: dict[str, Any],
|
| 281 |
-
step: int,
|
| 282 |
-
max_retries: int = 3,
|
| 283 |
-
) -> dict[str, Any]:
|
| 284 |
-
user_prompt = build_user_prompt(obs=obs, step=step)
|
| 285 |
-
|
| 286 |
-
last_error: Optional[Exception] = None
|
| 287 |
-
for attempt in range(max_retries):
|
| 288 |
-
try:
|
| 289 |
-
response = client.chat.completions.create(
|
| 290 |
-
model=MODEL_NAME,
|
| 291 |
-
messages=[
|
| 292 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 293 |
-
{"role": "user", "content": user_prompt},
|
| 294 |
-
],
|
| 295 |
-
temperature=TEMPERATURE,
|
| 296 |
-
max_tokens=MAX_TOKENS,
|
| 297 |
-
stream=False,
|
| 298 |
-
)
|
| 299 |
-
raw_text = response.choices[0].message.content or ""
|
| 300 |
-
return normalize_action(extract_json_object(raw_text))
|
| 301 |
-
except Exception as llm_err:
|
| 302 |
-
last_error = llm_err
|
| 303 |
-
time.sleep(2 ** attempt)
|
| 304 |
-
|
| 305 |
-
raise RuntimeError(f"LLM call failed after retries: {last_error}")
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
def get_action(client: Any, obs: dict[str, Any], step: int) -> dict[str, Any]:
|
| 309 |
-
try:
|
| 310 |
-
return build_llm_action(client=client, obs=obs, step=step)
|
| 311 |
-
except Exception:
|
| 312 |
-
return build_rule_action(obs.get("code_snippet", ""))
|
| 313 |
-
|
| 314 |
-
|
| 315 |
# ---------------------------------------------------------------------------
|
| 316 |
# Server readiness
|
| 317 |
# ---------------------------------------------------------------------------
|
|
@@ -330,6 +293,47 @@ def wait_for_server(timeout: int = 60) -> None:
|
|
| 330 |
raise RuntimeError(f"Server at {ENV_SERVER_URL} not ready after {timeout}s")
|
| 331 |
|
| 332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
# ---------------------------------------------------------------------------
|
| 334 |
# Agent loop — one task episode
|
| 335 |
# ---------------------------------------------------------------------------
|
|
@@ -357,6 +361,7 @@ def run_task(client: Any, task_id: str) -> None:
|
|
| 357 |
if done:
|
| 358 |
break
|
| 359 |
|
|
|
|
| 360 |
action_payload = get_action(client=client, obs=obs, step=step)
|
| 361 |
action_str = json.dumps(action_payload, separators=(",", ":"))
|
| 362 |
|
|
@@ -387,25 +392,73 @@ def run_task(client: Any, task_id: str) -> None:
|
|
| 387 |
log_end(success=success, steps=steps_taken, score=final_score, rewards=rewards)
|
| 388 |
|
| 389 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
# ---------------------------------------------------------------------------
|
| 391 |
# Main
|
| 392 |
# ---------------------------------------------------------------------------
|
| 393 |
|
| 394 |
|
| 395 |
def main() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
client = None
|
| 397 |
try:
|
| 398 |
from openai import OpenAI
|
| 399 |
-
|
| 400 |
-
# Dynamically fetch at runtime to prevent caching via import loops
|
| 401 |
-
val_api_base = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 402 |
-
val_api_key = os.environ.get("API_KEY") or os.environ.get("HF_TOKEN") or "missing-api-key"
|
| 403 |
-
|
| 404 |
client = OpenAI(base_url=val_api_base, api_key=val_api_key)
|
| 405 |
except Exception as e:
|
| 406 |
import sys
|
| 407 |
-
print(f"[WARN] Failed to
|
| 408 |
-
client =
|
| 409 |
|
| 410 |
wait_for_server(timeout=60)
|
| 411 |
|
|
|
|
| 275 |
}
|
| 276 |
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
# ---------------------------------------------------------------------------
|
| 279 |
# Server readiness
|
| 280 |
# ---------------------------------------------------------------------------
|
|
|
|
| 293 |
raise RuntimeError(f"Server at {ENV_SERVER_URL} not ready after {timeout}s")
|
| 294 |
|
| 295 |
|
| 296 |
+
# ---------------------------------------------------------------------------
|
| 297 |
+
# Pure urllib OpenAI Client Implementation
|
| 298 |
+
# ---------------------------------------------------------------------------
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
class PureUrllibOpenAIClient:
|
| 302 |
+
def __init__(self, base_url: str, api_key: str):
|
| 303 |
+
self.base_url = base_url.rstrip("/")
|
| 304 |
+
self.api_key = api_key
|
| 305 |
+
|
| 306 |
+
def create_chat_completion(
|
| 307 |
+
self,
|
| 308 |
+
model: str,
|
| 309 |
+
messages: list[dict[str, str]],
|
| 310 |
+
temperature: float = 0.0,
|
| 311 |
+
max_tokens: int = 512,
|
| 312 |
+
) -> str:
|
| 313 |
+
url = f"{self.base_url}/chat/completions"
|
| 314 |
+
payload = {
|
| 315 |
+
"model": model,
|
| 316 |
+
"messages": messages,
|
| 317 |
+
"temperature": temperature,
|
| 318 |
+
"max_tokens": max_tokens,
|
| 319 |
+
"stream": False,
|
| 320 |
+
}
|
| 321 |
+
data = json.dumps(payload).encode("utf-8")
|
| 322 |
+
req = urllib.request.Request(url, data=data, method="POST")
|
| 323 |
+
req.add_header("Content-Type", "application/json")
|
| 324 |
+
req.add_header("Authorization", f"Bearer {self.api_key}")
|
| 325 |
+
|
| 326 |
+
try:
|
| 327 |
+
with urllib.request.urlopen(req, timeout=60) as response:
|
| 328 |
+
result = json.loads(response.read().decode("utf-8"))
|
| 329 |
+
return result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
| 330 |
+
except urllib.error.HTTPError as e:
|
| 331 |
+
error_body = e.read().decode("utf-8")
|
| 332 |
+
raise RuntimeError(f"HTTP {e.code}: {error_body}")
|
| 333 |
+
except Exception as e:
|
| 334 |
+
raise RuntimeError(f"Proxy request failed: {e}")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
# ---------------------------------------------------------------------------
|
| 338 |
# Agent loop — one task episode
|
| 339 |
# ---------------------------------------------------------------------------
|
|
|
|
| 361 |
if done:
|
| 362 |
break
|
| 363 |
|
| 364 |
+
# If client is our PureUrllib wrapper, adapt the payload build logic
|
| 365 |
action_payload = get_action(client=client, obs=obs, step=step)
|
| 366 |
action_str = json.dumps(action_payload, separators=(",", ":"))
|
| 367 |
|
|
|
|
| 392 |
log_end(success=success, steps=steps_taken, score=final_score, rewards=rewards)
|
| 393 |
|
| 394 |
|
| 395 |
+
def build_llm_action(
|
| 396 |
+
client: Any,
|
| 397 |
+
obs: dict[str, Any],
|
| 398 |
+
step: int,
|
| 399 |
+
max_retries: int = 3,
|
| 400 |
+
) -> dict[str, Any]:
|
| 401 |
+
user_prompt = build_user_prompt(obs=obs, step=step)
|
| 402 |
+
|
| 403 |
+
last_error: Optional[Exception] = None
|
| 404 |
+
for attempt in range(max_retries):
|
| 405 |
+
try:
|
| 406 |
+
if isinstance(client, PureUrllibOpenAIClient):
|
| 407 |
+
raw_text = client.create_chat_completion(
|
| 408 |
+
model=MODEL_NAME,
|
| 409 |
+
messages=[
|
| 410 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 411 |
+
{"role": "user", "content": user_prompt},
|
| 412 |
+
],
|
| 413 |
+
temperature=TEMPERATURE,
|
| 414 |
+
max_tokens=MAX_TOKENS,
|
| 415 |
+
)
|
| 416 |
+
else:
|
| 417 |
+
response = client.chat.completions.create(
|
| 418 |
+
model=MODEL_NAME,
|
| 419 |
+
messages=[
|
| 420 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 421 |
+
{"role": "user", "content": user_prompt},
|
| 422 |
+
],
|
| 423 |
+
temperature=TEMPERATURE,
|
| 424 |
+
max_tokens=MAX_TOKENS,
|
| 425 |
+
stream=False,
|
| 426 |
+
)
|
| 427 |
+
raw_text = response.choices[0].message.content or ""
|
| 428 |
+
|
| 429 |
+
return normalize_action(extract_json_object(raw_text))
|
| 430 |
+
except Exception as llm_err:
|
| 431 |
+
last_error = llm_err
|
| 432 |
+
time.sleep(2 ** attempt)
|
| 433 |
+
|
| 434 |
+
raise RuntimeError(f"LLM call failed after retries: {last_error}")
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
def get_action(client: Any, obs: dict[str, Any], step: int) -> dict[str, Any]:
|
| 438 |
+
try:
|
| 439 |
+
return build_llm_action(client=client, obs=obs, step=step)
|
| 440 |
+
except Exception:
|
| 441 |
+
return build_rule_action(obs.get("code_snippet", ""))
|
| 442 |
+
|
| 443 |
+
|
| 444 |
# ---------------------------------------------------------------------------
|
| 445 |
# Main
|
| 446 |
# ---------------------------------------------------------------------------
|
| 447 |
|
| 448 |
|
| 449 |
def main() -> None:
|
| 450 |
+
# Dynamically fetch at runtime to prevent caching via import loops
|
| 451 |
+
val_api_base = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 452 |
+
val_api_key = os.environ.get("API_KEY") or os.environ.get("HF_TOKEN") or "missing-api-key"
|
| 453 |
+
|
| 454 |
client = None
|
| 455 |
try:
|
| 456 |
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
client = OpenAI(base_url=val_api_base, api_key=val_api_key)
|
| 458 |
except Exception as e:
|
| 459 |
import sys
|
| 460 |
+
print(f"[WARN] Failed to load openai pip, using fallback urllib REST client: {e}", file=sys.stderr)
|
| 461 |
+
client = PureUrllibOpenAIClient(base_url=val_api_base, api_key=val_api_key)
|
| 462 |
|
| 463 |
wait_for_server(timeout=60)
|
| 464 |
|