| import asyncio |
| import os |
| from typing import List, Optional |
|
|
| from openai import OpenAI |
|
|
| from client import TaskmanagerEnv |
| from models import TaskmanagerAction |
| from grader import compute_score |
|
|
|
|
| |
|
|
| API_KEY = os.environ.get("API_KEY", "dummy") |
| API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:4000") |
| MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") |
|
|
| BENCHMARK = "taskmanager" |
| MAX_STEPS = 20 |
| SUCCESS_SCORE_THRESHOLD = 0.6 |
|
|
| |
|
|
|
|
| def log_start(task: str, env: str, model: str): |
| print(f"[START] task={task} env={env} model={model}", flush=True) |
|
|
|
|
| def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]): |
| error_val = error if error else "null" |
| print( |
| f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}", |
| flush=True, |
| ) |
|
|
|
|
| def log_end(success: bool, steps: int, score: float, rewards: List[float]): |
| rewards_str = ",".join(f"{r:.2f}" for r in rewards) |
| print( |
| f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", |
| flush=True, |
| ) |
|
|
|
|
| |
|
|
|
|
| def choose_best_ticket(tickets): |
| if not tickets: |
| return None |
|
|
| def score(ticket): |
| type_score = {"bug": 3, "feature": 2, "enhancement": 1} |
| return ( |
| type_score.get(ticket["type"], 0), |
| ticket["priority"], |
| -ticket["deadline"], |
| ) |
|
|
| best = sorted(tickets, key=score, reverse=True)[0] |
| return best["id"] |
|
|
|
|
| |
|
|
|
|
| async def main(): |
| client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) |
| env = TaskmanagerEnv(base_url="http://localhost:8000") |
|
|
| try: |
| tasks_to_run = ["task-1-easy", "task-2-medium", "task-3-hard"] |
|
|
| for task_idx, TASK_NAME in enumerate(tasks_to_run): |
| rewards: List[float] = [] |
| steps_taken = 0 |
| success = False |
| score = 0.0 |
|
|
| log_start(TASK_NAME, BENCHMARK, MODEL_NAME) |
|
|
| |
| result = await env.reset() |
| obs = result.observation |
|
|
| try: |
| client.chat.completions.create( |
| model=MODEL_NAME, |
| messages=[{"role": "user", "content": "hello"}], |
| max_tokens=1, |
| ) |
| except Exception: |
| pass |
|
|
| for step in range(1, MAX_STEPS + 1): |
| if result.done: |
| break |
|
|
| tickets = obs.tasks |
| ticket_id = choose_best_ticket(tickets) |
|
|
| if ticket_id is None: |
| break |
|
|
| |
| result = await env.step(TaskmanagerAction(task_id=ticket_id)) |
| obs = result.observation |
|
|
| reward = result.reward or 0.0 |
| done = result.done |
| error = None |
|
|
| rewards.append(reward) |
| steps_taken = step |
|
|
| log_step( |
| step=step, |
| action=f"resolve_ticket_{ticket_id}", |
| reward=reward, |
| done=done, |
| error=error, |
| ) |
|
|
| if done: |
| break |
|
|
| |
| total_reward = sum(rewards) |
| max_per_step = 15 |
| max_possible = len(rewards) * max_per_step |
| score = compute_score(total_reward, max_possible) |
| success = score >= SUCCESS_SCORE_THRESHOLD |
|
|
| log_end(success, steps_taken, score, rewards) |
|
|
| finally: |
| try: |
| await env.close() |
| except Exception: |
| pass |
|
|
|
|
| if __name__ == "__main__": |
| asyncio.run(main()) |
|
|