File size: 3,994 Bytes
2f684d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import asyncio
import os
from typing import List, Optional

from openai import OpenAI

from client import TaskmanagerEnv
from models import TaskmanagerAction
from grader import compute_score  # ✅ GRADER USED


# ================= CONFIG =================

API_KEY = os.environ.get("API_KEY", "dummy")
API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:4000")
MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")

BENCHMARK = "taskmanager"
MAX_STEPS = 20
SUCCESS_SCORE_THRESHOLD = 0.6

# ================= LOGGING =================


def log_start(task: str, env: str, model: str):
    print(f"[START] task={task} env={env} model={model}", flush=True)


def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]):
    error_val = error if error else "null"
    print(
        f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}",
        flush=True,
    )


def log_end(success: bool, steps: int, score: float, rewards: List[float]):
    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
    print(
        f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
        flush=True,
    )


# ================= SMART POLICY =================


def choose_best_ticket(tickets):
    if not tickets:
        return None

    def score(ticket):
        type_score = {"bug": 3, "feature": 2, "enhancement": 1}
        return (
            type_score.get(ticket["type"], 0),
            ticket["priority"],
            -ticket["deadline"],
        )

    best = sorted(tickets, key=score, reverse=True)[0]
    return best["id"]


# ================= MAIN =================


async def main():
    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
    env = TaskmanagerEnv(base_url="http://localhost:8000")

    try:
        tasks_to_run = ["task-1-easy", "task-2-medium", "task-3-hard"]

        for task_idx, TASK_NAME in enumerate(tasks_to_run):
            rewards: List[float] = []
            steps_taken = 0
            success = False
            score = 0.0

            log_start(TASK_NAME, BENCHMARK, MODEL_NAME)

            # 🔥 RESET ENV (tickets auto-generated)
            result = await env.reset()
            obs = result.observation

            try:
                client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[{"role": "user", "content": "hello"}],
                    max_tokens=1,
                )
            except Exception:
                pass

            for step in range(1, MAX_STEPS + 1):
                if result.done:
                    break

                tickets = obs.tasks
                ticket_id = choose_best_ticket(tickets)

                if ticket_id is None:
                    break

                # 🔥 STEP
                result = await env.step(TaskmanagerAction(task_id=ticket_id))
                obs = result.observation

                reward = result.reward or 0.0
                done = result.done
                error = None

                rewards.append(reward)
                steps_taken = step

                log_step(
                    step=step,
                    action=f"resolve_ticket_{ticket_id}",
                    reward=reward,
                    done=done,
                    error=error,
                )

                if done:
                    break

            # ================= GRADER =================
            total_reward = sum(rewards)
            max_per_step = 15
            max_possible = len(rewards) * max_per_step
            score = compute_score(total_reward, max_possible)  # ✅ GRADER USED
            success = score >= SUCCESS_SCORE_THRESHOLD

            log_end(success, steps_taken, score, rewards)

    finally:
        try:
            await env.close()
        except Exception:
            pass


if __name__ == "__main__":
    asyncio.run(main())