File size: 3,994 Bytes
2f684d2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | import asyncio
import os
from typing import List, Optional
from openai import OpenAI
from client import TaskmanagerEnv
from models import TaskmanagerAction
from grader import compute_score # ✅ GRADER USED
# ================= CONFIG =================
API_KEY = os.environ.get("API_KEY", "dummy")
API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:4000")
MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
BENCHMARK = "taskmanager"
MAX_STEPS = 20
SUCCESS_SCORE_THRESHOLD = 0.6
# ================= LOGGING =================
def log_start(task: str, env: str, model: str):
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]):
error_val = error if error else "null"
print(
f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}",
flush=True,
)
def log_end(success: bool, steps: int, score: float, rewards: List[float]):
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(
f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
flush=True,
)
# ================= SMART POLICY =================
def choose_best_ticket(tickets):
if not tickets:
return None
def score(ticket):
type_score = {"bug": 3, "feature": 2, "enhancement": 1}
return (
type_score.get(ticket["type"], 0),
ticket["priority"],
-ticket["deadline"],
)
best = sorted(tickets, key=score, reverse=True)[0]
return best["id"]
# ================= MAIN =================
async def main():
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
env = TaskmanagerEnv(base_url="http://localhost:8000")
try:
tasks_to_run = ["task-1-easy", "task-2-medium", "task-3-hard"]
for task_idx, TASK_NAME in enumerate(tasks_to_run):
rewards: List[float] = []
steps_taken = 0
success = False
score = 0.0
log_start(TASK_NAME, BENCHMARK, MODEL_NAME)
# 🔥 RESET ENV (tickets auto-generated)
result = await env.reset()
obs = result.observation
try:
client.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "user", "content": "hello"}],
max_tokens=1,
)
except Exception:
pass
for step in range(1, MAX_STEPS + 1):
if result.done:
break
tickets = obs.tasks
ticket_id = choose_best_ticket(tickets)
if ticket_id is None:
break
# 🔥 STEP
result = await env.step(TaskmanagerAction(task_id=ticket_id))
obs = result.observation
reward = result.reward or 0.0
done = result.done
error = None
rewards.append(reward)
steps_taken = step
log_step(
step=step,
action=f"resolve_ticket_{ticket_id}",
reward=reward,
done=done,
error=error,
)
if done:
break
# ================= GRADER =================
total_reward = sum(rewards)
max_per_step = 15
max_possible = len(rewards) * max_per_step
score = compute_score(total_reward, max_possible) # ✅ GRADER USED
success = score >= SUCCESS_SCORE_THRESHOLD
log_end(success, steps_taken, score, rewards)
finally:
try:
await env.close()
except Exception:
pass
if __name__ == "__main__":
asyncio.run(main())
|