import asyncio import os from typing import List, Optional from openai import OpenAI from client import TaskmanagerEnv from models import TaskmanagerAction from grader import compute_score # ✅ GRADER USED # ================= CONFIG ================= API_KEY = os.environ.get("API_KEY", "dummy") API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:4000") MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") BENCHMARK = "taskmanager" MAX_STEPS = 20 SUCCESS_SCORE_THRESHOLD = 0.6 # ================= LOGGING ================= def log_start(task: str, env: str, model: str): print(f"[START] task={task} env={env} model={model}", flush=True) def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]): error_val = error if error else "null" print( f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}", flush=True, ) def log_end(success: bool, steps: int, score: float, rewards: List[float]): rewards_str = ",".join(f"{r:.2f}" for r in rewards) print( f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True, ) # ================= SMART POLICY ================= def choose_best_ticket(tickets): if not tickets: return None def score(ticket): type_score = {"bug": 3, "feature": 2, "enhancement": 1} return ( type_score.get(ticket["type"], 0), ticket["priority"], -ticket["deadline"], ) best = sorted(tickets, key=score, reverse=True)[0] return best["id"] # ================= MAIN ================= async def main(): client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) env = TaskmanagerEnv(base_url="http://localhost:8000") try: tasks_to_run = ["task-1-easy", "task-2-medium", "task-3-hard"] for task_idx, TASK_NAME in enumerate(tasks_to_run): rewards: List[float] = [] steps_taken = 0 success = False score = 0.0 log_start(TASK_NAME, BENCHMARK, MODEL_NAME) # 🔥 RESET ENV (tickets auto-generated) result = await env.reset() obs = result.observation try: client.chat.completions.create( model=MODEL_NAME, messages=[{"role": "user", "content": "hello"}], max_tokens=1, ) except Exception: pass for step in range(1, MAX_STEPS + 1): if result.done: break tickets = obs.tasks ticket_id = choose_best_ticket(tickets) if ticket_id is None: break # 🔥 STEP result = await env.step(TaskmanagerAction(task_id=ticket_id)) obs = result.observation reward = result.reward or 0.0 done = result.done error = None rewards.append(reward) steps_taken = step log_step( step=step, action=f"resolve_ticket_{ticket_id}", reward=reward, done=done, error=error, ) if done: break # ================= GRADER ================= total_reward = sum(rewards) max_per_step = 15 max_possible = len(rewards) * max_per_step score = compute_score(total_reward, max_possible) # ✅ GRADER USED success = score >= SUCCESS_SCORE_THRESHOLD log_end(success, steps_taken, score, rewards) finally: try: await env.close() except Exception: pass if __name__ == "__main__": asyncio.run(main())