Spaces:

Revanth-ml
/

agentops-gym

Sleeping

App Files Files Community

Revanth-ml commited on Apr 9

Commit

425b7fe

verified ·

1 Parent(s): bdf2d43

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

inference.py +10 -11

inference.py CHANGED Viewed

@@ -49,17 +49,18 @@ from agentops_gym.models import ToolCall
 # ---------------------------------------------------------------------------
 IMAGE_NAME   = os.getenv("IMAGE_NAME")
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
-API_KEY     = os.getenv("HF_TOKEN")
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
-# Backward compatibility for API_KEY
-# API_KEY      = os.getenv("OPENAI_API_KEY") or os.getenv("API_KEY")
 BENCHMARK               = "agentops-gym"
 MAX_STEPS               = 10
-TEMPERATURE             = 0.5
 MAX_TOKENS              = 1024
 SUCCESS_SCORE_THRESHOLD = 0.5
@@ -109,6 +110,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
 def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
     print(
         f"[END] success={str(success).lower()} steps={steps} "
@@ -268,12 +270,13 @@ def run_task(client: OpenAI, task_id: str) -> Dict:
         score = float(meta.get("grader_score") or 0.0)
         if score == 0.0 and rewards:
             score = float(meta.get("cumulative_reward") or 0.0)
-        score   = max(0.0, min(score, 1.0))
         success = score >= SUCCESS_SCORE_THRESHOLD
     except Exception as e:
         print(f"[DEBUG] Task {task_id} error: {e}", flush=True)
         last_error = str(e)
     finally:
         log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
@@ -295,11 +298,7 @@ def main() -> None:
         print("ERROR: Set HF_TOKEN, OPENAI_API_KEY, or API_KEY.", file=sys.stderr)
         sys.exit(1)
-    # Force usage of environment variables for the OpenAI client
-    api_key = os.environ["API_KEY"]
-    api_base_url = os.environ["API_BASE_URL"]
-    client = OpenAI(base_url=api_base_url, api_key=api_key)
     print("=" * 60, flush=True)
     print("AgentOps Gym — Baseline Inference", flush=True)

 # ---------------------------------------------------------------------------
 IMAGE_NAME   = os.getenv("IMAGE_NAME")
+API_KEY      = (
+    os.getenv("HF_TOKEN")
+    or os.getenv("OPENAI_API_KEY")
+    or os.getenv("API_KEY")
+)
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
 BENCHMARK               = "agentops-gym"
 MAX_STEPS               = 10
+TEMPERATURE             = 0.3
 MAX_TOKENS              = 1024
 SUCCESS_SCORE_THRESHOLD = 0.5
 def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
+    score = max(0.001, min(0.999, score))  # must be strictly between 0 and 1
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
     print(
         f"[END] success={str(success).lower()} steps={steps} "
         score = float(meta.get("grader_score") or 0.0)
         if score == 0.0 and rewards:
             score = float(meta.get("cumulative_reward") or 0.0)
+        score   = max(0.001, min(0.999, score))
         success = score >= SUCCESS_SCORE_THRESHOLD
     except Exception as e:
         print(f"[DEBUG] Task {task_id} error: {e}", flush=True)
         last_error = str(e)
+        score = 0.001  # never exactly 0.0
     finally:
         log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
         print("ERROR: Set HF_TOKEN, OPENAI_API_KEY, or API_KEY.", file=sys.stderr)
         sys.exit(1)
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
     print("=" * 60, flush=True)
     print("AgentOps Gym — Baseline Inference", flush=True)