File size: 4,734 Bytes
2f684d2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
"""
AI Ticket Prioritization Environment (Jira-like)
Simulates a real-world engineering workflow:
- Bug fixes (critical)
- Feature development
- UI enhancements
Agent must prioritize tickets to maximize business impact and avoid SLA violations.
"""
from dataclasses import dataclass
from uuid import uuid4
import random
try:
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
except ImportError:
class Environment:
pass
@dataclass
class State:
episode_id: str
step_count: int
try:
from ..models import TaskmanagerAction, TaskmanagerObservation
except ImportError:
from models import TaskmanagerAction, TaskmanagerObservation
class TaskmanagerEnvironment(Environment):
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self.current_time = 0
self.tickets = []
self.max_steps = 20
self.total_reward = 0
self.episode_count = 0
# ================= TICKET GENERATOR =================
def generate_tickets(self, num_tickets):
tickets = []
current_time = 0
for i in range(num_tickets):
ticket_type = random.choice(["bug", "feature", "enhancement"])
effort = random.randint(1, 3)
# 🔥 ensure feasible deadline
slack = random.randint(3, 8)
deadline = current_time + effort + slack
priority = random.randint(1, 5)
ticket = {
"id": i + 1,
"deadline": deadline,
"priority": priority,
"effort": effort,
"type": ticket_type,
}
tickets.append(ticket)
# update time so sequence is solvable
current_time += effort
# 🔥 shuffle so agent must think
random.shuffle(tickets)
return tickets
# ================= RESET =================
def reset(self) -> TaskmanagerObservation:
self._state = State(episode_id=str(uuid4()), step_count=0)
self.current_time = 0
self.episode_count += 1
avg_reward = self.total_reward / max(1, self.episode_count)
if avg_reward < 5:
num_tickets = 5
elif avg_reward < 15:
num_tickets = 8
else:
num_tickets = 12
self.tickets = self.generate_tickets(num_tickets)
print(
f"Episode {self.episode_count} | Tickets: {num_tickets} | Avg reward: {avg_reward:.2f}"
)
return TaskmanagerObservation(
tasks=self.tickets, # ⚠️ keep 'tasks' for compatibility
current_time=self.current_time,
steps_left=self.max_steps,
reward=0.0,
done=False,
)
# ================= STEP =================
def step(self, action: TaskmanagerAction) -> TaskmanagerObservation:
self._state.step_count += 1
reward = 0
ticket = next((t for t in self.tickets if t["id"] == action.task_id), None)
if ticket:
self.current_time += ticket["effort"]
# 🎯 BASE REWARD
if self.current_time <= ticket["deadline"]:
reward = ticket["priority"] * 3 # boosted reward for being on time
else:
delay = self.current_time - ticket["deadline"]
# Soft penalty: base priority minus a small delay fraction (can still be positive if slightly late)
reward = max(-2, ticket["priority"] - (delay * 0.5))
# 🔥 TYPE MULTIPLIER (REAL-WORLD LOGIC)
if ticket["type"] == "bug":
reward *= 2 # critical
elif ticket["type"] == "feature":
reward *= 1.5
else: # enhancement
reward *= 1
# remove ticket
self.tickets = [t for t in self.tickets if t["id"] != action.task_id]
else:
reward = -1
self.total_reward += reward
done = len(self.tickets) == 0 or self._state.step_count >= self.max_steps
return TaskmanagerObservation(
tasks=self.tickets,
current_time=self.current_time,
steps_left=self.max_steps - self._state.step_count,
reward=reward,
done=done,
metadata={
"step": self._state.step_count,
"remaining_tickets": len(self.tickets),
},
)
@property
def state(self) -> State:
return self._state
|