File size: 4,734 Bytes
2f684d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.

"""
AI Ticket Prioritization Environment (Jira-like)

Simulates a real-world engineering workflow:
- Bug fixes (critical)
- Feature development
- UI enhancements

Agent must prioritize tickets to maximize business impact and avoid SLA violations.
"""

from dataclasses import dataclass
from uuid import uuid4
import random

try:
    from openenv.core.env_server.interfaces import Environment
    from openenv.core.env_server.types import State
except ImportError:

    class Environment:
        pass

    @dataclass
    class State:
        episode_id: str
        step_count: int


try:
    from ..models import TaskmanagerAction, TaskmanagerObservation
except ImportError:
    from models import TaskmanagerAction, TaskmanagerObservation


class TaskmanagerEnvironment(Environment):
    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self):
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_time = 0
        self.tickets = []
        self.max_steps = 20

        self.total_reward = 0
        self.episode_count = 0

    # ================= TICKET GENERATOR =================

    def generate_tickets(self, num_tickets):
        tickets = []
        current_time = 0

        for i in range(num_tickets):
            ticket_type = random.choice(["bug", "feature", "enhancement"])

            effort = random.randint(1, 3)

            # 🔥 ensure feasible deadline
            slack = random.randint(3, 8)
            deadline = current_time + effort + slack

            priority = random.randint(1, 5)

            ticket = {
                "id": i + 1,
                "deadline": deadline,
                "priority": priority,
                "effort": effort,
                "type": ticket_type,
            }

            tickets.append(ticket)

            # update time so sequence is solvable
            current_time += effort

        # 🔥 shuffle so agent must think
        random.shuffle(tickets)

        return tickets

    # ================= RESET =================

    def reset(self) -> TaskmanagerObservation:
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_time = 0

        self.episode_count += 1

        avg_reward = self.total_reward / max(1, self.episode_count)

        if avg_reward < 5:
            num_tickets = 5
        elif avg_reward < 15:
            num_tickets = 8
        else:
            num_tickets = 12

        self.tickets = self.generate_tickets(num_tickets)

        print(
            f"Episode {self.episode_count} | Tickets: {num_tickets} | Avg reward: {avg_reward:.2f}"
        )

        return TaskmanagerObservation(
            tasks=self.tickets,  # ⚠️ keep 'tasks' for compatibility
            current_time=self.current_time,
            steps_left=self.max_steps,
            reward=0.0,
            done=False,
        )

    # ================= STEP =================

    def step(self, action: TaskmanagerAction) -> TaskmanagerObservation:
        self._state.step_count += 1

        reward = 0

        ticket = next((t for t in self.tickets if t["id"] == action.task_id), None)

        if ticket:
            self.current_time += ticket["effort"]

            # 🎯 BASE REWARD
            if self.current_time <= ticket["deadline"]:
                reward = ticket["priority"] * 3  # boosted reward for being on time
            else:
                delay = self.current_time - ticket["deadline"]
                # Soft penalty: base priority minus a small delay fraction (can still be positive if slightly late)
                reward = max(-2, ticket["priority"] - (delay * 0.5))

            # 🔥 TYPE MULTIPLIER (REAL-WORLD LOGIC)
            if ticket["type"] == "bug":
                reward *= 2  # critical
            elif ticket["type"] == "feature":
                reward *= 1.5
            else:  # enhancement
                reward *= 1

            # remove ticket
            self.tickets = [t for t in self.tickets if t["id"] != action.task_id]

        else:
            reward = -1

        self.total_reward += reward

        done = len(self.tickets) == 0 or self._state.step_count >= self.max_steps

        return TaskmanagerObservation(
            tasks=self.tickets,
            current_time=self.current_time,
            steps_left=self.max_steps - self._state.step_count,
            reward=reward,
            done=done,
            metadata={
                "step": self._state.step_count,
                "remaining_tickets": len(self.tickets),
            },
        )

    @property
    def state(self) -> State:
        return self._state