Vittal-M commited on
Commit
5a735ce
·
verified ·
1 Parent(s): a4606d9

Upload environment.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. environment.py +721 -0
environment.py ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Core Scheduling Optimisation Environment implementing the OpenEnv API contract.
2
+
3
+ Design principles
4
+ -----------------
5
+ * reset() always returns a valid Observation — never raises.
6
+ * step() clamps reward to [0.0, 1.0] unconditionally.
7
+ * Task-aware instance routing: conflict_classification and schedule_repair
8
+ are shown only infeasible instances; feasibility_check sees all 12.
9
+ * Per-step contextual feedback: the context string and info['grading_breakdown']
10
+ give the agent actionable signal on every step, enabling sample-efficient
11
+ multi-step improvement within a single episode.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import copy
17
+ import json
18
+ from typing import Any
19
+
20
+ from graders.grader_classification import ConflictGrader
21
+ from graders.grader_detection import FeasibilityGrader
22
+ from graders.grader_fix import RepairGrader
23
+ from models import Action, Observation
24
+
25
+ # Grader singletons — one per task, reused across episodes.
26
+ _GRADERS: dict[str, Any] = {
27
+ "feasibility_check": FeasibilityGrader(),
28
+ "conflict_classification": ConflictGrader(),
29
+ "schedule_repair": RepairGrader(),
30
+ }
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Scheduling instance bank — 12 diverse instances.
34
+ #
35
+ # Each entry:
36
+ # instance – dict exposed to the agent (jobs + machines + proposed_schedule)
37
+ # is_feasible – bool, ground-truth for Task 1
38
+ # violation_type – str | None, ground-truth for Task 2
39
+ # optimal_schedule – dict, the repaired schedule for Task 3
40
+ # optimal_makespan – int, minimum achievable makespan
41
+ # description – one-line human-readable summary
42
+ # ---------------------------------------------------------------------------
43
+
44
+ INSTANCE_BANK: list[dict[str, Any]] = [
45
+ # ------------------------------------------------------------------ #
46
+ # 0 — resource_overload #
47
+ # J1[0,4) and J2[2,5) overlap on M1 (capacity=1). #
48
+ # Fix: sequence J2 after J1. #
49
+ # ------------------------------------------------------------------ #
50
+ {
51
+ "instance": {
52
+ "problem_id": "P01",
53
+ "jobs": [
54
+ {"id": "J1", "duration": 4, "deadline": 20, "dependencies": [], "resource_req": 1},
55
+ {"id": "J2", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
56
+ {"id": "J3", "duration": 2, "deadline": 20, "dependencies": [], "resource_req": 1},
57
+ ],
58
+ "machines": [
59
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
60
+ {"id": "M2", "capacity": 1, "available_start": 0, "available_end": 24},
61
+ ],
62
+ "proposed_schedule": {
63
+ "assignments": [
64
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
65
+ {"job_id": "J2", "machine_id": "M1", "start_time": 2},
66
+ {"job_id": "J3", "machine_id": "M2", "start_time": 0},
67
+ ]
68
+ },
69
+ },
70
+ "is_feasible": False,
71
+ "violation_type": "resource_overload",
72
+ "optimal_schedule": {
73
+ "assignments": [
74
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
75
+ {"job_id": "J2", "machine_id": "M1", "start_time": 4},
76
+ {"job_id": "J3", "machine_id": "M2", "start_time": 0},
77
+ ]
78
+ },
79
+ "optimal_makespan": 7,
80
+ "description": "J1[0,4) and J2[2,5) overlap on M1 (capacity=1) → resource_overload.",
81
+ },
82
+ # ------------------------------------------------------------------ #
83
+ # 1 — deadline_violation #
84
+ # J1 starts late (t=5, dur=5), finishes at t=10 > deadline=8. #
85
+ # Fix: schedule J1 first so it finishes at t=5 ≤ 8. #
86
+ # ------------------------------------------------------------------ #
87
+ {
88
+ "instance": {
89
+ "problem_id": "P02",
90
+ "jobs": [
91
+ {"id": "J1", "duration": 5, "deadline": 8, "dependencies": [], "resource_req": 1},
92
+ {"id": "J2", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
93
+ ],
94
+ "machines": [
95
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
96
+ ],
97
+ "proposed_schedule": {
98
+ "assignments": [
99
+ {"job_id": "J1", "machine_id": "M1", "start_time": 5},
100
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
101
+ ]
102
+ },
103
+ },
104
+ "is_feasible": False,
105
+ "violation_type": "deadline_violation",
106
+ "optimal_schedule": {
107
+ "assignments": [
108
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
109
+ {"job_id": "J2", "machine_id": "M1", "start_time": 5},
110
+ ]
111
+ },
112
+ "optimal_makespan": 8,
113
+ "description": "J1 starts at t=5 and finishes at t=10, violating deadline=8.",
114
+ },
115
+ # ------------------------------------------------------------------ #
116
+ # 2 — precedence_violation #
117
+ # J2 depends on J1 (J1 finishes t=8) but J2 starts at t=0. #
118
+ # Fix: start J1 first, then J2 after J1 completes. #
119
+ # ------------------------------------------------------------------ #
120
+ {
121
+ "instance": {
122
+ "problem_id": "P03",
123
+ "jobs": [
124
+ {"id": "J1", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
125
+ {"id": "J2", "duration": 3, "deadline": 20, "dependencies": ["J1"], "resource_req": 1},
126
+ ],
127
+ "machines": [
128
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
129
+ {"id": "M2", "capacity": 1, "available_start": 0, "available_end": 24},
130
+ ],
131
+ "proposed_schedule": {
132
+ "assignments": [
133
+ {"job_id": "J1", "machine_id": "M1", "start_time": 5},
134
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
135
+ ]
136
+ },
137
+ },
138
+ "is_feasible": False,
139
+ "violation_type": "precedence_violation",
140
+ "optimal_schedule": {
141
+ "assignments": [
142
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
143
+ {"job_id": "J2", "machine_id": "M2", "start_time": 3},
144
+ ]
145
+ },
146
+ "optimal_makespan": 6,
147
+ "description": "J2 depends on J1; J2 starts at t=0 but J1 does not finish until t=8.",
148
+ },
149
+ # ------------------------------------------------------------------ #
150
+ # 3 — availability_conflict #
151
+ # M1 available [8,18]. J1 starts at t=5, before the window opens. #
152
+ # Fix: shift J1 to start at t=8 (first valid slot). #
153
+ # ------------------------------------------------------------------ #
154
+ {
155
+ "instance": {
156
+ "problem_id": "P04",
157
+ "jobs": [
158
+ {"id": "J1", "duration": 4, "deadline": 24, "dependencies": [], "resource_req": 1},
159
+ {"id": "J2", "duration": 3, "deadline": 24, "dependencies": [], "resource_req": 1},
160
+ ],
161
+ "machines": [
162
+ {"id": "M1", "capacity": 1, "available_start": 8, "available_end": 18},
163
+ ],
164
+ "proposed_schedule": {
165
+ "assignments": [
166
+ {"job_id": "J1", "machine_id": "M1", "start_time": 5},
167
+ {"job_id": "J2", "machine_id": "M1", "start_time": 9},
168
+ ]
169
+ },
170
+ },
171
+ "is_feasible": False,
172
+ "violation_type": "availability_conflict",
173
+ "optimal_schedule": {
174
+ "assignments": [
175
+ {"job_id": "J1", "machine_id": "M1", "start_time": 8},
176
+ {"job_id": "J2", "machine_id": "M1", "start_time": 12},
177
+ ]
178
+ },
179
+ "optimal_makespan": 15,
180
+ "description": "J1 starts at t=5, before M1's available window [8,18] → availability_conflict.",
181
+ },
182
+ # ------------------------------------------------------------------ #
183
+ # 4 — capacity_exceeded #
184
+ # 3 jobs on M1 simultaneously; capacity=2 → load=3 > 2. #
185
+ # Fix: stagger one job to start after the first batch finishes. #
186
+ # ------------------------------------------------------------------ #
187
+ {
188
+ "instance": {
189
+ "problem_id": "P05",
190
+ "jobs": [
191
+ {"id": "J1", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
192
+ {"id": "J2", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
193
+ {"id": "J3", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
194
+ ],
195
+ "machines": [
196
+ {"id": "M1", "capacity": 2, "available_start": 0, "available_end": 24},
197
+ ],
198
+ "proposed_schedule": {
199
+ "assignments": [
200
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
201
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
202
+ {"job_id": "J3", "machine_id": "M1", "start_time": 0},
203
+ ]
204
+ },
205
+ },
206
+ "is_feasible": False,
207
+ "violation_type": "capacity_exceeded",
208
+ "optimal_schedule": {
209
+ "assignments": [
210
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
211
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
212
+ {"job_id": "J3", "machine_id": "M1", "start_time": 3},
213
+ ]
214
+ },
215
+ "optimal_makespan": 6,
216
+ "description": "3 jobs start simultaneously on M1 (capacity=2); concurrent load=3 > 2.",
217
+ },
218
+ # ------------------------------------------------------------------ #
219
+ # 5 — resource_overload (variant) #
220
+ # J1[0,5) and J2[1,5) overlap on M1 (capacity=1). #
221
+ # Fix: run jobs sequentially. #
222
+ # ------------------------------------------------------------------ #
223
+ {
224
+ "instance": {
225
+ "problem_id": "P06",
226
+ "jobs": [
227
+ {"id": "J1", "duration": 5, "deadline": 20, "dependencies": [], "resource_req": 1},
228
+ {"id": "J2", "duration": 4, "deadline": 20, "dependencies": [], "resource_req": 1},
229
+ {"id": "J3", "duration": 2, "deadline": 20, "dependencies": [], "resource_req": 1},
230
+ ],
231
+ "machines": [
232
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
233
+ ],
234
+ "proposed_schedule": {
235
+ "assignments": [
236
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
237
+ {"job_id": "J2", "machine_id": "M1", "start_time": 1},
238
+ {"job_id": "J3", "machine_id": "M1", "start_time": 8},
239
+ ]
240
+ },
241
+ },
242
+ "is_feasible": False,
243
+ "violation_type": "resource_overload",
244
+ "optimal_schedule": {
245
+ "assignments": [
246
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
247
+ {"job_id": "J2", "machine_id": "M1", "start_time": 5},
248
+ {"job_id": "J3", "machine_id": "M1", "start_time": 9},
249
+ ]
250
+ },
251
+ "optimal_makespan": 11,
252
+ "description": "J1[0,5) and J2[1,5) overlap on M1 (capacity=1) → resource_overload.",
253
+ },
254
+ # ------------------------------------------------------------------ #
255
+ # 6 — deadline_violation (chain with avoidable idle time) #
256
+ # J1→J2→J3 chain. J1 starts at t=3 (wasted idle), making the chain #
257
+ # finish at t=15 > deadline=13. Fix: start J1 at t=0 → chain ends at #
258
+ # t=12 ≤ 13. NOTE: J3 duration is 3 (not 4) so the chain IS solvable. #
259
+ # ------------------------------------------------------------------ #
260
+ {
261
+ "instance": {
262
+ "problem_id": "P07",
263
+ "jobs": [
264
+ {"id": "J1", "duration": 4, "deadline": 20, "dependencies": [], "resource_req": 1},
265
+ {"id": "J2", "duration": 5, "deadline": 20, "dependencies": ["J1"], "resource_req": 1},
266
+ {"id": "J3", "duration": 3, "deadline": 13, "dependencies": ["J2"], "resource_req": 1},
267
+ ],
268
+ "machines": [
269
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
270
+ {"id": "M2", "capacity": 1, "available_start": 0, "available_end": 24},
271
+ {"id": "M3", "capacity": 1, "available_start": 0, "available_end": 24},
272
+ ],
273
+ "proposed_schedule": {
274
+ "assignments": [
275
+ {"job_id": "J1", "machine_id": "M1", "start_time": 3},
276
+ {"job_id": "J2", "machine_id": "M2", "start_time": 7},
277
+ {"job_id": "J3", "machine_id": "M3", "start_time": 12},
278
+ ]
279
+ },
280
+ },
281
+ "is_feasible": False,
282
+ "violation_type": "deadline_violation",
283
+ # Optimal: eliminate idle prefix → J1 starts at t=0, chain finishes at t=12 ≤ 13
284
+ "optimal_schedule": {
285
+ "assignments": [
286
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
287
+ {"job_id": "J2", "machine_id": "M2", "start_time": 4},
288
+ {"job_id": "J3", "machine_id": "M3", "start_time": 9},
289
+ ]
290
+ },
291
+ "optimal_makespan": 12,
292
+ "description": "J1 starts at t=3 (unnecessary idle); J3 finishes at t=15 > deadline=13.",
293
+ },
294
+ # ------------------------------------------------------------------ #
295
+ # 7 — precedence_violation (fan-in: two predecessors) #
296
+ # J3 depends on J1 and J2; J3 starts at t=2 but J2 finishes at t=4. #
297
+ # Fix: delay J3 start to t=4 (max predecessor finish time). #
298
+ # ------------------------------------------------------------------ #
299
+ {
300
+ "instance": {
301
+ "problem_id": "P08",
302
+ "jobs": [
303
+ {"id": "J1", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
304
+ {"id": "J2", "duration": 4, "deadline": 20, "dependencies": [], "resource_req": 1},
305
+ {"id": "J3", "duration": 2, "deadline": 20, "dependencies": ["J1", "J2"], "resource_req": 1},
306
+ ],
307
+ "machines": [
308
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
309
+ {"id": "M2", "capacity": 1, "available_start": 0, "available_end": 24},
310
+ {"id": "M3", "capacity": 1, "available_start": 0, "available_end": 24},
311
+ ],
312
+ "proposed_schedule": {
313
+ "assignments": [
314
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
315
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
316
+ {"job_id": "J3", "machine_id": "M3", "start_time": 2},
317
+ ]
318
+ },
319
+ },
320
+ "is_feasible": False,
321
+ "violation_type": "precedence_violation",
322
+ "optimal_schedule": {
323
+ "assignments": [
324
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
325
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
326
+ {"job_id": "J3", "machine_id": "M3", "start_time": 4},
327
+ ]
328
+ },
329
+ "optimal_makespan": 6,
330
+ "description": "J3 depends on J1 and J2; J3 starts at t=2 but J2 does not finish until t=4.",
331
+ },
332
+ # ------------------------------------------------------------------ #
333
+ # 8 — availability_conflict (maintenance window) #
334
+ # M1 available only [0,10]. J1 starts at t=9, runs [9,12) → exceeds #
335
+ # the window. Fix: schedule J1 before the window closes. #
336
+ # ------------------------------------------------------------------ #
337
+ {
338
+ "instance": {
339
+ "problem_id": "P09",
340
+ "jobs": [
341
+ {"id": "J1", "duration": 3, "deadline": 24, "dependencies": [], "resource_req": 1},
342
+ {"id": "J2", "duration": 2, "deadline": 24, "dependencies": [], "resource_req": 1},
343
+ ],
344
+ "machines": [
345
+ {
346
+ "id": "M1",
347
+ "capacity": 1,
348
+ "available_start": 0,
349
+ "available_end": 10,
350
+ "note": "M1 under maintenance t=[10,15]; use window [0,10] only.",
351
+ },
352
+ ],
353
+ "proposed_schedule": {
354
+ "assignments": [
355
+ {"job_id": "J1", "machine_id": "M1", "start_time": 9},
356
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
357
+ ]
358
+ },
359
+ },
360
+ "is_feasible": False,
361
+ "violation_type": "availability_conflict",
362
+ "optimal_schedule": {
363
+ "assignments": [
364
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
365
+ {"job_id": "J1", "machine_id": "M1", "start_time": 2},
366
+ ]
367
+ },
368
+ "optimal_makespan": 5,
369
+ "description": "J1 starts at t=9, extends into maintenance window [10,15] → availability_conflict.",
370
+ },
371
+ # ------------------------------------------------------------------ #
372
+ # 9 — capacity_exceeded (four jobs on capacity-3 machine) #
373
+ # Concurrent load at t=0 is 4 > capacity=3. #
374
+ # Fix: stagger the fourth job. #
375
+ # ------------------------------------------------------------------ #
376
+ {
377
+ "instance": {
378
+ "problem_id": "P10",
379
+ "jobs": [
380
+ {"id": "J1", "duration": 2, "deadline": 20, "dependencies": [], "resource_req": 1},
381
+ {"id": "J2", "duration": 2, "deadline": 20, "dependencies": [], "resource_req": 1},
382
+ {"id": "J3", "duration": 2, "deadline": 20, "dependencies": [], "resource_req": 1},
383
+ {"id": "J4", "duration": 2, "deadline": 20, "dependencies": [], "resource_req": 1},
384
+ ],
385
+ "machines": [
386
+ {"id": "M1", "capacity": 3, "available_start": 0, "available_end": 24},
387
+ ],
388
+ "proposed_schedule": {
389
+ "assignments": [
390
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
391
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
392
+ {"job_id": "J3", "machine_id": "M1", "start_time": 0},
393
+ {"job_id": "J4", "machine_id": "M1", "start_time": 0},
394
+ ]
395
+ },
396
+ },
397
+ "is_feasible": False,
398
+ "violation_type": "capacity_exceeded",
399
+ "optimal_schedule": {
400
+ "assignments": [
401
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
402
+ {"job_id": "J2", "machine_id": "M1", "start_time": 0},
403
+ {"job_id": "J3", "machine_id": "M1", "start_time": 0},
404
+ {"job_id": "J4", "machine_id": "M1", "start_time": 2},
405
+ ]
406
+ },
407
+ "optimal_makespan": 4,
408
+ "description": "4 jobs start simultaneously on M1 (capacity=3); concurrent load=4 > 3.",
409
+ },
410
+ # ------------------------------------------------------------------ #
411
+ # 10 — FEASIBLE: 3-job, 2-machine #
412
+ # All constraints satisfied in the proposed schedule. #
413
+ # ------------------------------------------------------------------ #
414
+ {
415
+ "instance": {
416
+ "problem_id": "P11",
417
+ "jobs": [
418
+ {"id": "J1", "duration": 4, "deadline": 20, "dependencies": [], "resource_req": 1},
419
+ {"id": "J2", "duration": 3, "deadline": 20, "dependencies": [], "resource_req": 1},
420
+ {"id": "J3", "duration": 5, "deadline": 20, "dependencies": ["J1"], "resource_req": 1},
421
+ ],
422
+ "machines": [
423
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
424
+ {"id": "M2", "capacity": 1, "available_start": 0, "available_end": 24},
425
+ ],
426
+ "proposed_schedule": {
427
+ "assignments": [
428
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
429
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
430
+ {"job_id": "J3", "machine_id": "M2", "start_time": 4},
431
+ ]
432
+ },
433
+ },
434
+ "is_feasible": True,
435
+ "violation_type": None,
436
+ "optimal_schedule": {
437
+ "assignments": [
438
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
439
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
440
+ {"job_id": "J3", "machine_id": "M2", "start_time": 4},
441
+ ]
442
+ },
443
+ "optimal_makespan": 9,
444
+ "description": "Fully feasible 3-job, 2-machine schedule — all constraints satisfied.",
445
+ },
446
+ # ------------------------------------------------------------------ #
447
+ # 11 — FEASIBLE: 5-job, 3-machine with fan-in precedence #
448
+ # All constraints satisfied in the proposed schedule. #
449
+ # ------------------------------------------------------------------ #
450
+ {
451
+ "instance": {
452
+ "problem_id": "P12",
453
+ "jobs": [
454
+ {"id": "J1", "duration": 3, "deadline": 30, "dependencies": [], "resource_req": 1},
455
+ {"id": "J2", "duration": 2, "deadline": 30, "dependencies": [], "resource_req": 1},
456
+ {"id": "J3", "duration": 4, "deadline": 30, "dependencies": [], "resource_req": 1},
457
+ {"id": "J4", "duration": 3, "deadline": 30, "dependencies": ["J1", "J2"], "resource_req": 1},
458
+ {"id": "J5", "duration": 2, "deadline": 30, "dependencies": ["J3"], "resource_req": 1},
459
+ ],
460
+ "machines": [
461
+ {"id": "M1", "capacity": 1, "available_start": 0, "available_end": 24},
462
+ {"id": "M2", "capacity": 1, "available_start": 0, "available_end": 24},
463
+ {"id": "M3", "capacity": 1, "available_start": 0, "available_end": 24},
464
+ ],
465
+ "proposed_schedule": {
466
+ "assignments": [
467
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
468
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
469
+ {"job_id": "J3", "machine_id": "M3", "start_time": 0},
470
+ {"job_id": "J4", "machine_id": "M1", "start_time": 3},
471
+ {"job_id": "J5", "machine_id": "M3", "start_time": 4},
472
+ ]
473
+ },
474
+ },
475
+ "is_feasible": True,
476
+ "violation_type": None,
477
+ "optimal_schedule": {
478
+ "assignments": [
479
+ {"job_id": "J1", "machine_id": "M1", "start_time": 0},
480
+ {"job_id": "J2", "machine_id": "M2", "start_time": 0},
481
+ {"job_id": "J3", "machine_id": "M3", "start_time": 0},
482
+ {"job_id": "J4", "machine_id": "M1", "start_time": 3},
483
+ {"job_id": "J5", "machine_id": "M3", "start_time": 4},
484
+ ]
485
+ },
486
+ "optimal_makespan": 6,
487
+ "description": "Fully feasible 5-job, 3-machine schedule with fan-in precedence — all constraints satisfied.",
488
+ },
489
+ ]
490
+
491
+ # ---------------------------------------------------------------------------
492
+ # Task-specific instance pools (built once after INSTANCE_BANK is defined).
493
+ # This ensures task-appropriate instances are shown per task:
494
+ # feasibility_check → all 12 (mix of feasible and infeasible)
495
+ # conflict_classification → 10 infeasible only (feasible has no violation)
496
+ # schedule_repair → 10 infeasible with known optimal repairs
497
+ # ---------------------------------------------------------------------------
498
+ _TASK_POOLS: dict[str, list[dict[str, Any]]] = {
499
+ "feasibility_check": INSTANCE_BANK,
500
+ "conflict_classification": [e for e in INSTANCE_BANK if not e["is_feasible"]],
501
+ "schedule_repair": [
502
+ e for e in INSTANCE_BANK if not e["is_feasible"] and e.get("optimal_schedule")
503
+ ],
504
+ }
505
+
506
+
507
+ class SchedulingOptEnv:
508
+ """OpenEnv-compatible scheduling optimisation environment.
509
+
510
+ Public API (OpenEnv contract)
511
+ -----------------------------
512
+ reset(task_id: str) → Observation
513
+ step(action: Action) → (Observation, float, bool, dict)
514
+ state() → dict
515
+ """
516
+
517
+ def __init__(self) -> None:
518
+ self._task_id: str = "feasibility_check"
519
+ self._step: int = 0
520
+ self._max_steps: int = 3
521
+ # Per-task episode counters for round-robin cycling within each pool
522
+ self._task_counters: dict[str, int] = {}
523
+ # The instance used in the current episode (set by reset)
524
+ self._current_instance: dict[str, Any] = {}
525
+ self._done: bool = True
526
+ self._history: list[dict[str, Any]] = []
527
+ self._cumulative_reward: float = 0.0
528
+
529
+ # ------------------------------------------------------------------
530
+ # Public API
531
+ # ------------------------------------------------------------------
532
+
533
+ def reset(self, task_id: str = "feasibility_check") -> Observation:
534
+ """Start a new episode.
535
+
536
+ Selects the next instance from the task-appropriate pool in round-robin
537
+ order so that repeated resets present diverse scheduling problems.
538
+ Always succeeds — never raises an exception.
539
+ """
540
+ self._task_id = task_id
541
+ self._step = 0
542
+ self._done = False
543
+ self._history = []
544
+ self._cumulative_reward = 0.0
545
+
546
+ step_limits: dict[str, int] = {
547
+ "feasibility_check": 3,
548
+ "conflict_classification": 5,
549
+ "schedule_repair": 8,
550
+ }
551
+ self._max_steps = step_limits.get(task_id, 3)
552
+
553
+ # Task-aware round-robin instance selection
554
+ pool = _TASK_POOLS.get(task_id, INSTANCE_BANK)
555
+ idx = self._task_counters.get(task_id, 0) % len(pool)
556
+ self._current_instance = pool[idx]
557
+ self._task_counters[task_id] = idx + 1
558
+
559
+ return Observation(
560
+ schedule_instance=json.dumps(self._current_instance["instance"], indent=2),
561
+ task_id=task_id,
562
+ context=self._build_context(task_id, step=0, last_reward=None),
563
+ step_number=0,
564
+ )
565
+
566
+ def step(self, action: Action) -> tuple[Observation, float, bool, dict[str, Any]]:
567
+ """Process one agent action.
568
+
569
+ Returns (observation, reward, done, info).
570
+ Reward is always clamped to [0.0, 1.0].
571
+ """
572
+ if self._done:
573
+ return (
574
+ Observation(
575
+ schedule_instance="{}",
576
+ task_id=self._task_id,
577
+ context="Episode is over. Call /reset to start a new episode.",
578
+ step_number=self._step,
579
+ ),
580
+ 0.0,
581
+ True,
582
+ {"error": "episode_already_done"},
583
+ )
584
+
585
+ self._step += 1
586
+
587
+ grader = _GRADERS.get(self._task_id, _GRADERS["feasibility_check"])
588
+
589
+ reward: float = grader.grade(action, self._current_instance)
590
+ reward = max(0.0, min(1.0, float(reward))) # hard clamp — invariant
591
+ self._cumulative_reward += reward
592
+
593
+ # Capture grading breakdown for rich info dict
594
+ breakdown: dict[str, Any] = getattr(grader, "last_breakdown", {})
595
+
596
+ # Record step history (truncate long responses for storage efficiency)
597
+ self._history.append({
598
+ "step": self._step,
599
+ "action": action.response[:300],
600
+ "reward": round(reward, 4),
601
+ })
602
+
603
+ # Termination: max steps exhausted or near-perfect reward (≥0.95)
604
+ done = self._step >= self._max_steps or reward >= 0.95
605
+ self._done = done
606
+
607
+ # Build next observation
608
+ if done:
609
+ best = max(h["reward"] for h in self._history)
610
+ ctx = (
611
+ "Episode complete — constraint satisfied."
612
+ if reward >= 0.95
613
+ else f"Max steps reached. Best reward this episode: {best:.2f}."
614
+ )
615
+ obs = Observation(
616
+ schedule_instance="{}",
617
+ task_id=self._task_id,
618
+ context=ctx,
619
+ step_number=self._step,
620
+ )
621
+ else:
622
+ obs = Observation(
623
+ schedule_instance=json.dumps(
624
+ self._current_instance["instance"], indent=2
625
+ ),
626
+ task_id=self._task_id,
627
+ context=self._build_context(
628
+ self._task_id, step=self._step, last_reward=reward
629
+ ),
630
+ step_number=self._step,
631
+ )
632
+
633
+ info: dict[str, Any] = {
634
+ "step_reward": round(reward, 4),
635
+ "cumulative_reward": round(self._cumulative_reward, 4),
636
+ "steps_remaining": max(0, self._max_steps - self._step),
637
+ "instance_description": self._current_instance.get("description", ""),
638
+ "grading_breakdown": breakdown,
639
+ }
640
+ return obs, round(reward, 4), done, info
641
+
642
+ def state(self) -> dict[str, Any]:
643
+ """Return a snapshot of the full internal environment state."""
644
+ return {
645
+ "task_id": self._task_id,
646
+ "step": self._step,
647
+ "max_steps": self._max_steps,
648
+ "done": self._done,
649
+ "cumulative_reward": round(self._cumulative_reward, 4),
650
+ "history": copy.deepcopy(self._history),
651
+ "current_instance_id": (
652
+ self._current_instance.get("instance", {}).get("problem_id", "")
653
+ ),
654
+ "current_instance_feasible": self._current_instance.get("is_feasible"),
655
+ "task_counters": dict(self._task_counters),
656
+ "instance_pool_sizes": {k: len(v) for k, v in _TASK_POOLS.items()},
657
+ }
658
+
659
+ # ------------------------------------------------------------------
660
+ # Internal helpers
661
+ # ------------------------------------------------------------------
662
+
663
+ @staticmethod
664
+ def _build_context(
665
+ task_id: str, step: int, last_reward: float | None
666
+ ) -> str:
667
+ """Build a context string that adapts to the current step and last reward.
668
+
669
+ On the first step (step=0) a clear task description is returned.
670
+ On retry steps (step>0, last_reward<0.95) an informative hint is appended
671
+ to guide the agent toward a better answer.
672
+ """
673
+ base_contexts: dict[str, str] = {
674
+ "feasibility_check": (
675
+ "Examine the proposed_schedule against all four constraint categories "
676
+ "(machine capacity, job deadlines, precedence dependencies, machine "
677
+ "availability windows). Reply with exactly 'feasible' if every constraint "
678
+ "is satisfied, or 'infeasible' if any constraint is violated."
679
+ ),
680
+ "conflict_classification": (
681
+ "The proposed_schedule is infeasible. Identify the PRIMARY constraint "
682
+ "violation and reply with exactly one of: resource_overload, "
683
+ "deadline_violation, precedence_violation, availability_conflict, "
684
+ "capacity_exceeded."
685
+ ),
686
+ "schedule_repair": (
687
+ "The proposed_schedule is infeasible. Return ONLY a JSON object with key "
688
+ '"assignments": a list of {"job_id": str, "machine_id": str, '
689
+ '"start_time": int} dicts that resolves ALL violations (capacity, '
690
+ "deadlines, precedence, availability) and minimises total makespan."
691
+ ),
692
+ }
693
+ ctx = base_contexts.get(task_id, "Analyse the scheduling instance.")
694
+
695
+ # Add retry hint when the agent is wrong but still has steps remaining
696
+ if step > 0 and last_reward is not None and last_reward < 0.95:
697
+ hints: dict[str, str] = {
698
+ "feasibility_check": (
699
+ " ← Previous answer was incorrect. "
700
+ "Re-examine all four constraint types carefully."
701
+ ),
702
+ "conflict_classification": (
703
+ " ← Previous classification was wrong. "
704
+ "Check whether jobs share a machine simultaneously (resource/capacity), "
705
+ "miss their deadlines, violate ordering, or run outside availability windows."
706
+ ),
707
+ "schedule_repair": (
708
+ " ← Previous repair had remaining violations. "
709
+ "Ensure no two jobs overlap on a capacity-1 machine, every job "
710
+ "finishes before its deadline, precedence order is respected, and "
711
+ "all jobs run within machine availability windows."
712
+ ),
713
+ }
714
+ ctx += hints.get(task_id, "")
715
+
716
+ return ctx
717
+
718
+ @staticmethod
719
+ def get_instance_bank() -> list[dict[str, Any]]:
720
+ """Return the full instance bank (all 12 entries)."""
721
+ return INSTANCE_BANK