Revanth-ml commited on
Commit
425b7fe
·
verified ·
1 Parent(s): bdf2d43

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +10 -11
inference.py CHANGED
@@ -49,17 +49,18 @@ from agentops_gym.models import ToolCall
49
  # ---------------------------------------------------------------------------
50
 
51
  IMAGE_NAME = os.getenv("IMAGE_NAME")
 
 
 
 
 
52
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
53
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
54
- API_KEY = os.getenv("HF_TOKEN")
55
  ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
56
 
57
- # Backward compatibility for API_KEY
58
- # API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("API_KEY")
59
-
60
  BENCHMARK = "agentops-gym"
61
  MAX_STEPS = 10
62
- TEMPERATURE = 0.5
63
  MAX_TOKENS = 1024
64
  SUCCESS_SCORE_THRESHOLD = 0.5
65
 
@@ -109,6 +110,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
109
 
110
 
111
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
 
112
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
113
  print(
114
  f"[END] success={str(success).lower()} steps={steps} "
@@ -268,12 +270,13 @@ def run_task(client: OpenAI, task_id: str) -> Dict:
268
  score = float(meta.get("grader_score") or 0.0)
269
  if score == 0.0 and rewards:
270
  score = float(meta.get("cumulative_reward") or 0.0)
271
- score = max(0.0, min(score, 1.0))
272
  success = score >= SUCCESS_SCORE_THRESHOLD
273
 
274
  except Exception as e:
275
  print(f"[DEBUG] Task {task_id} error: {e}", flush=True)
276
  last_error = str(e)
 
277
 
278
  finally:
279
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
@@ -295,11 +298,7 @@ def main() -> None:
295
  print("ERROR: Set HF_TOKEN, OPENAI_API_KEY, or API_KEY.", file=sys.stderr)
296
  sys.exit(1)
297
 
298
- # Force usage of environment variables for the OpenAI client
299
- api_key = os.environ["API_KEY"]
300
- api_base_url = os.environ["API_BASE_URL"]
301
-
302
- client = OpenAI(base_url=api_base_url, api_key=api_key)
303
 
304
  print("=" * 60, flush=True)
305
  print("AgentOps Gym — Baseline Inference", flush=True)
 
49
  # ---------------------------------------------------------------------------
50
 
51
  IMAGE_NAME = os.getenv("IMAGE_NAME")
52
+ API_KEY = (
53
+ os.getenv("HF_TOKEN")
54
+ or os.getenv("OPENAI_API_KEY")
55
+ or os.getenv("API_KEY")
56
+ )
57
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
58
  MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
 
59
  ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
60
 
 
 
 
61
  BENCHMARK = "agentops-gym"
62
  MAX_STEPS = 10
63
+ TEMPERATURE = 0.3
64
  MAX_TOKENS = 1024
65
  SUCCESS_SCORE_THRESHOLD = 0.5
66
 
 
110
 
111
 
112
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
113
+ score = max(0.001, min(0.999, score)) # must be strictly between 0 and 1
114
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
115
  print(
116
  f"[END] success={str(success).lower()} steps={steps} "
 
270
  score = float(meta.get("grader_score") or 0.0)
271
  if score == 0.0 and rewards:
272
  score = float(meta.get("cumulative_reward") or 0.0)
273
+ score = max(0.001, min(0.999, score))
274
  success = score >= SUCCESS_SCORE_THRESHOLD
275
 
276
  except Exception as e:
277
  print(f"[DEBUG] Task {task_id} error: {e}", flush=True)
278
  last_error = str(e)
279
+ score = 0.001 # never exactly 0.0
280
 
281
  finally:
282
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 
298
  print("ERROR: Set HF_TOKEN, OPENAI_API_KEY, or API_KEY.", file=sys.stderr)
299
  sys.exit(1)
300
 
301
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
 
 
 
 
302
 
303
  print("=" * 60, flush=True)
304
  print("AgentOps Gym — Baseline Inference", flush=True)