kunhsiang commited on
Commit
acc387c
·
verified ·
1 Parent(s): 1015cd0

Upload crmp_env.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. crmp_env.py +768 -0
crmp_env.py ADDED
@@ -0,0 +1,768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CRMP Environment: Circular Rubber Manufacturing Problem
3
+ Two-Line Flowshop with Circular Material Constraints
4
+
5
+ Data from: Yin et al. (2021) Sustainability, Table 3 & Table 4
6
+ Format: processing_time, type1_granulates, type2_strips
7
+
8
+ Line A: yields materials after each operation
9
+ Line B: demands materials before each operation
10
+ """
11
+
12
+ import numpy as np
13
+ from typing import Optional
14
+
15
+
16
+ NUM_JOBS_A = 8
17
+ NUM_MACHINES_A = 6
18
+ NUM_JOBS_B = 6
19
+ NUM_MACHINES_B = 3
20
+
21
+ # =================================================================
22
+ # Table 3: Line A - (processing_time, yield_granulates, yield_strips)
23
+ # Rows: J1-J8, Columns: M1-M6
24
+ # =================================================================
25
+ _TABLE3 = [
26
+ # J1: M1 M2 M3 M4 M5 M6
27
+ [(115, 63, 15), (21, 20, 13), (10, 15, 5), (173, 147, 37), (12, 11, 6), (52, 39, 20)],
28
+ # J2:
29
+ [(77, 74, 35), ( 5, 4, 1), (14, 17, 7), (113, 122, 66), ( 7, 9, 2), (111, 33, 68)],
30
+ # J3:
31
+ [(107, 96, 5), (26, 33, 5), (14, 23, 3), (132, 57, 59), ( 3, 1, 1), (36, 28, 3)],
32
+ # J4:
33
+ [( 93, 140, 54), (23, 32, 13), (11, 14, 2), (169, 141, 76), (14, 22, 4), (107, 91, 64)],
34
+ # J5:
35
+ [( 91, 74, 49), (15, 6, 4), (10, 7, 4), ( 92, 29, 29), ( 8, 6, 2), (53, 37, 8)],
36
+ # J6:
37
+ [( 62, 12, 28), (10, 11, 6), (14, 2, 5), (145, 140, 27), ( 4, 2, 2), (68, 67, 43)],
38
+ # J7:
39
+ [( 77, 28, 38), (17, 19, 5), (11, 5, 5), (165, 107, 8), ( 5, 6, 2), (50, 68, 15)],
40
+ # J8:
41
+ [( 72, 46, 40), (25, 22, 3), (14, 12, 8), (114, 150, 63), (11, 4, 6), (66, 107, 11)],
42
+ ]
43
+
44
+ # Parse into separate arrays
45
+ LINE_A_PROC = np.zeros((NUM_JOBS_A, NUM_MACHINES_A), dtype=np.float64)
46
+ LINE_A_YIELD_GRAN = np.zeros((NUM_JOBS_A, NUM_MACHINES_A), dtype=np.float64)
47
+ LINE_A_YIELD_STRIP = np.zeros((NUM_JOBS_A, NUM_MACHINES_A), dtype=np.float64)
48
+
49
+ for j in range(NUM_JOBS_A):
50
+ for m in range(NUM_MACHINES_A):
51
+ p, g, s = _TABLE3[j][m]
52
+ LINE_A_PROC[j, m] = p
53
+ LINE_A_YIELD_GRAN[j, m] = g
54
+ LINE_A_YIELD_STRIP[j, m] = s
55
+
56
+ # =================================================================
57
+ # Table 4: Line B - (processing_time, demand_granulates, demand_strips)
58
+ # Each operation has its own material demand!
59
+ # =================================================================
60
+ _TABLE4 = [
61
+ # J1B: M1B M2B M3B
62
+ [(51, 134, 42), (21, 76, 18), ( 84, 98, 103)],
63
+ # J2B:
64
+ [(54, 101, 82), (43, 40, 40), ( 75, 114, 44)],
65
+ # J3B:
66
+ [(37, 88, 45), (40, 114, 21), (110, 116, 96)],
67
+ # J4B:
68
+ [(71, 75, 37), (19, 71, 24), ( 85, 288, 55)],
69
+ # J5B:
70
+ [(32, 127, 30), (31, 72, 25), ( 96, 196, 50)],
71
+ # J6B:
72
+ [(78, 218, 105), (26, 65, 41), (112, 189, 111)],
73
+ ]
74
+
75
+ LINE_B_PROC = np.zeros((NUM_JOBS_B, NUM_MACHINES_B), dtype=np.float64)
76
+ LINE_B_DEMAND_GRAN = np.zeros((NUM_JOBS_B, NUM_MACHINES_B), dtype=np.float64)
77
+ LINE_B_DEMAND_STRIP = np.zeros((NUM_JOBS_B, NUM_MACHINES_B), dtype=np.float64)
78
+
79
+ for j in range(NUM_JOBS_B):
80
+ for m in range(NUM_MACHINES_B):
81
+ p, g, s = _TABLE4[j][m]
82
+ LINE_B_PROC[j, m] = p
83
+ LINE_B_DEMAND_GRAN[j, m] = g
84
+ LINE_B_DEMAND_STRIP[j, m] = s
85
+
86
+
87
+ def verify_data():
88
+ """Verify material balance: total yield >= total demand."""
89
+ total_g = LINE_A_YIELD_GRAN.sum()
90
+ total_s = LINE_A_YIELD_STRIP.sum()
91
+ demand_g = LINE_B_DEMAND_GRAN.sum()
92
+ demand_s = LINE_B_DEMAND_STRIP.sum()
93
+ print(f"Granulates: yield={total_g:.0f}, demand={demand_g:.0f}, surplus={total_g-demand_g:.0f}")
94
+ print(f"Strips: yield={total_s:.0f}, demand={demand_s:.0f}, surplus={total_s-demand_s:.0f}")
95
+ return total_g >= demand_g and total_s >= demand_s
96
+
97
+
98
+ def simulate_crmp(seq_a, seq_b, proc_a=None, proc_b=None,
99
+ yield_gran=None, yield_strip=None,
100
+ demand_gran=None, demand_strip=None):
101
+ """
102
+ Correct permutation flowshop simulation for CRMP.
103
+ All machines process jobs in the SAME order (permutation constraint).
104
+ """
105
+ if proc_a is None: proc_a = LINE_A_PROC
106
+ if proc_b is None: proc_b = LINE_B_PROC
107
+ if yield_gran is None: yield_gran = LINE_A_YIELD_GRAN
108
+ if yield_strip is None: yield_strip = LINE_A_YIELD_STRIP
109
+ if demand_gran is None: demand_gran = LINE_B_DEMAND_GRAN
110
+ if demand_strip is None: demand_strip = LINE_B_DEMAND_STRIP
111
+
112
+ # ---- Line A: standard permutation flowshop ----
113
+ a_comp = np.zeros((NUM_JOBS_A, NUM_MACHINES_A))
114
+ yield_time = {}
115
+
116
+ for pos, j in enumerate(seq_a):
117
+ for m in range(NUM_MACHINES_A):
118
+ if pos == 0 and m == 0:
119
+ start = 0
120
+ elif pos == 0:
121
+ start = a_comp[pos][m-1]
122
+ elif m == 0:
123
+ start = a_comp[pos-1][m]
124
+ else:
125
+ start = max(a_comp[pos-1][m], a_comp[pos][m-1])
126
+ a_comp[pos][m] = start + proc_a[j, m]
127
+ yield_time[(j, m)] = a_comp[pos][m]
128
+
129
+ yield_events = []
130
+ for (j, m), t in yield_time.items():
131
+ yield_events.append((t, yield_gran[j, m], yield_strip[j, m]))
132
+ yield_events.sort()
133
+
134
+ # ---- Line B: permutation flowshop with material constraints ----
135
+ b_comp = np.zeros((NUM_JOBS_B, NUM_MACHINES_B))
136
+ buf_g = 0.0
137
+ buf_s = 0.0
138
+ yield_idx = 0
139
+
140
+ def get_buffer_at(time_t):
141
+ nonlocal buf_g, buf_s, yield_idx
142
+ while yield_idx < len(yield_events) and yield_events[yield_idx][0] <= time_t:
143
+ _, g, s = yield_events[yield_idx]
144
+ buf_g += g
145
+ buf_s += s
146
+ yield_idx += 1
147
+
148
+ for pos, j in enumerate(seq_b):
149
+ for m in range(NUM_MACHINES_B):
150
+ if pos == 0 and m == 0:
151
+ earliest = 0
152
+ elif pos == 0:
153
+ earliest = b_comp[pos][m-1]
154
+ elif m == 0:
155
+ earliest = b_comp[pos-1][m]
156
+ else:
157
+ earliest = max(b_comp[pos-1][m], b_comp[pos][m-1])
158
+
159
+ dg = demand_gran[j, m]
160
+ ds = demand_strip[j, m]
161
+ get_buffer_at(earliest)
162
+
163
+ if buf_g >= dg and buf_s >= ds:
164
+ start = earliest
165
+ else:
166
+ start = earliest
167
+ saved_g, saved_s, saved_idx = buf_g, buf_s, yield_idx
168
+ found = False
169
+ for yi in range(yield_idx, len(yield_events)):
170
+ yt, yg, ys = yield_events[yi]
171
+ wait_time = max(earliest, yt)
172
+ tmp_g, tmp_s = saved_g, saved_s
173
+ for yj in range(saved_idx, len(yield_events)):
174
+ if yield_events[yj][0] <= wait_time:
175
+ tmp_g += yield_events[yj][1]
176
+ tmp_s += yield_events[yj][2]
177
+ else:
178
+ break
179
+ if tmp_g >= dg and tmp_s >= ds:
180
+ start = wait_time
181
+ get_buffer_at(start)
182
+ found = True
183
+ break
184
+ if not found:
185
+ get_buffer_at(float('inf'))
186
+ start = max(earliest, yield_events[-1][0] if yield_events else earliest)
187
+
188
+ buf_g -= dg
189
+ buf_s -= ds
190
+ b_comp[pos][m] = start + proc_b[j, m]
191
+
192
+ makespan = max(a_comp[-1, -1], b_comp[-1, -1])
193
+ return {"makespan": makespan,
194
+ "a_end": a_comp[-1, -1],
195
+ "b_end": b_comp[-1, -1]}
196
+
197
+
198
+ def evaluate_sequence(seq_a, seq_b, proc_a=None, proc_b=None):
199
+ """Quick evaluation of a sequence pair."""
200
+ return simulate_crmp(seq_a, seq_b, proc_a, proc_b)["makespan"]
201
+
202
+
203
+ def simulate_nonperm(order_a, order_b, proc_a=None, proc_b=None,
204
+ yield_gran=None, yield_strip=None,
205
+ demand_gran=None, demand_strip=None):
206
+ """
207
+ Non-permutation flowshop simulation for CRMP.
208
+
209
+ order_a: dict {machine: [job_order]} or list (same order all machines)
210
+ order_b: same for Line B
211
+
212
+ Key difference from permutation: each machine can process jobs in different orders.
213
+ Line B operations are scheduled in temporal order (event-driven) for correct
214
+ material consumption.
215
+ """
216
+ if proc_a is None: proc_a = LINE_A_PROC
217
+ if proc_b is None: proc_b = LINE_B_PROC
218
+ if yield_gran is None: yield_gran = LINE_A_YIELD_GRAN
219
+ if yield_strip is None: yield_strip = LINE_A_YIELD_STRIP
220
+ if demand_gran is None: demand_gran = LINE_B_DEMAND_GRAN
221
+ if demand_strip is None: demand_strip = LINE_B_DEMAND_STRIP
222
+
223
+ if isinstance(order_a, list) and isinstance(order_a[0], int):
224
+ order_a = {m: list(order_a) for m in range(NUM_MACHINES_A)}
225
+ if isinstance(order_b, list) and isinstance(order_b[0], int):
226
+ order_b = {m: list(order_b) for m in range(NUM_MACHINES_B)}
227
+
228
+ # ---- Line A: non-permutation flowshop (machine-by-machine is correct) ----
229
+ a_end = np.full((NUM_JOBS_A, NUM_MACHINES_A), -1.0)
230
+ a_machine_end = np.zeros(NUM_MACHINES_A)
231
+
232
+ for m in range(NUM_MACHINES_A):
233
+ for j in order_a[m]:
234
+ if m == 0:
235
+ job_ready = 0
236
+ else:
237
+ job_ready = a_end[j, m-1]
238
+ if job_ready < 0:
239
+ raise ValueError(f"Job {j} not completed on machine {m-1} before scheduling on {m}")
240
+ start = max(job_ready, a_machine_end[m])
241
+ a_end[j, m] = start + proc_a[j, m]
242
+ a_machine_end[m] = a_end[j, m]
243
+
244
+ # Collect yield events sorted by time
245
+ yield_events = []
246
+ for j in range(NUM_JOBS_A):
247
+ for m in range(NUM_MACHINES_A):
248
+ yield_events.append((a_end[j, m], yield_gran[j, m], yield_strip[j, m]))
249
+ yield_events.sort()
250
+
251
+ # ---- Line B: event-driven simulation with material constraints ----
252
+ # Process operations in temporal order across all machines
253
+ b_end = np.full((NUM_JOBS_B, NUM_MACHINES_B), -1.0)
254
+ b_machine_end = np.zeros(NUM_MACHINES_B)
255
+ next_pos = [0] * NUM_MACHINES_B # next position to schedule on each machine
256
+ buf_g = 0.0
257
+ buf_s = 0.0
258
+ yield_idx = 0
259
+
260
+ def flush_to(t):
261
+ nonlocal buf_g, buf_s, yield_idx
262
+ while yield_idx < len(yield_events) and yield_events[yield_idx][0] <= t:
263
+ _, g, s = yield_events[yield_idx]
264
+ buf_g += g
265
+ buf_s += s
266
+ yield_idx += 1
267
+
268
+ def find_material_time(earliest, dg, ds):
269
+ """Find earliest time >= earliest when materials are available."""
270
+ nonlocal buf_g, buf_s, yield_idx
271
+ flush_to(earliest)
272
+ if buf_g >= dg and buf_s >= ds:
273
+ return earliest
274
+ saved_g, saved_s, saved_idx = buf_g, buf_s, yield_idx
275
+ for yi in range(yield_idx, len(yield_events)):
276
+ yt = yield_events[yi][0]
277
+ wait_time = max(earliest, yt)
278
+ tmp_g, tmp_s = saved_g, saved_s
279
+ for yj in range(saved_idx, len(yield_events)):
280
+ if yield_events[yj][0] <= wait_time:
281
+ tmp_g += yield_events[yj][1]
282
+ tmp_s += yield_events[yj][2]
283
+ else:
284
+ break
285
+ if tmp_g >= dg and tmp_s >= ds:
286
+ return wait_time
287
+ # All yields exhausted
288
+ return max(earliest, yield_events[-1][0] if yield_events else earliest)
289
+
290
+ scheduled = 0
291
+ total_ops = NUM_JOBS_B * NUM_MACHINES_B
292
+
293
+ while scheduled < total_ops:
294
+ # Find the operation with earliest possible start time
295
+ best_start = float('inf')
296
+ best_m = -1
297
+ candidates = []
298
+
299
+ for m in range(NUM_MACHINES_B):
300
+ pos = next_pos[m]
301
+ if pos >= NUM_JOBS_B:
302
+ continue
303
+ j = order_b[m][pos]
304
+
305
+ # Flowshop constraint: job must have finished previous machine
306
+ if m == 0:
307
+ job_ready = 0.0
308
+ else:
309
+ if b_end[j, m-1] < 0:
310
+ continue # not yet done on previous machine
311
+ job_ready = b_end[j, m-1]
312
+
313
+ earliest = max(job_ready, b_machine_end[m])
314
+ candidates.append((earliest, m, j))
315
+
316
+ if not candidates:
317
+ raise RuntimeError("No schedulable operations but not all done")
318
+
319
+ # Sort by earliest start, break ties by machine index (earlier machine first)
320
+ candidates.sort()
321
+
322
+ # Schedule the first candidate that can get materials earliest
323
+ # (In practice, we schedule the one with earliest flowshop start,
324
+ # since material wait affects ALL candidates equally)
325
+ earliest, m, j = candidates[0]
326
+ dg = demand_gran[j, m]
327
+ ds = demand_strip[j, m]
328
+
329
+ # Find actual start time considering materials
330
+ # Save buffer state to restore after probing
331
+ saved_g, saved_s, saved_idx = buf_g, buf_s, yield_idx
332
+ start = find_material_time(earliest, dg, ds)
333
+ # Restore and properly flush
334
+ buf_g, buf_s, yield_idx = saved_g, saved_s, saved_idx
335
+ flush_to(start)
336
+
337
+ buf_g -= dg
338
+ buf_s -= ds
339
+ b_end[j, m] = start + proc_b[j, m]
340
+ b_machine_end[m] = b_end[j, m]
341
+ next_pos[m] += 1
342
+ scheduled += 1
343
+
344
+ makespan = max(a_end[:, -1].max(), b_end[:, -1].max())
345
+ return {"makespan": makespan,
346
+ "a_end": a_end[:, -1].max(),
347
+ "b_end": b_end[:, -1].max()}
348
+
349
+
350
+ class CRMPEnv:
351
+ """
352
+ CRMP Environment for DRL - Sequence Building.
353
+
354
+ The agent builds TWO sequences (Line A and Line B) step by step.
355
+ Phase 1: Build Line A sequence (8 steps - pick one unscheduled job each step)
356
+ Phase 2: Build Line B sequence (6 steps - pick one unscheduled job each step)
357
+
358
+ Total: 14 steps per episode (always terminates, no timeout risk).
359
+ After both sequences are built, simulate_crmp evaluates the makespan.
360
+
361
+ Action space:
362
+ Phase 1 (Line A): pick from 8 jobs -> action 0..7
363
+ Phase 2 (Line B): pick from 6 jobs -> action 0..5
364
+
365
+ This is a PERMUTATION flowshop formulation (same as GA baseline).
366
+ DRL advantage: learns scheduling heuristics from data, generalizes to stochastic instances.
367
+ """
368
+
369
+ def __init__(self, stochastic=False, noise_std=0.1,
370
+ base_proc_a=None, base_proc_b=None,
371
+ base_yield_g=None, base_yield_s=None,
372
+ base_demand_g=None, base_demand_s=None):
373
+ self.stochastic = stochastic
374
+ self.noise_std = noise_std
375
+ self.base_proc_a = base_proc_a if base_proc_a is not None else LINE_A_PROC
376
+ self.base_proc_b = base_proc_b if base_proc_b is not None else LINE_B_PROC
377
+ self.base_yield_g = base_yield_g if base_yield_g is not None else LINE_A_YIELD_GRAN
378
+ self.base_yield_s = base_yield_s if base_yield_s is not None else LINE_A_YIELD_STRIP
379
+ self.base_demand_g = base_demand_g if base_demand_g is not None else LINE_B_DEMAND_GRAN
380
+ self.base_demand_s = base_demand_s if base_demand_s is not None else LINE_B_DEMAND_STRIP
381
+ self.rng = np.random.default_rng()
382
+ self.reset()
383
+
384
+ @property
385
+ def obs_dim(self):
386
+ return self._get_obs().shape[0]
387
+
388
+ def reset(self, seed=None):
389
+ if seed is not None:
390
+ self.rng = np.random.default_rng(seed)
391
+
392
+ self.proc_a = self._sample(self.base_proc_a)
393
+ self.proc_b = self._sample(self.base_proc_b)
394
+
395
+ # Sequences being built
396
+ self.seq_a = []
397
+ self.seq_b = []
398
+
399
+ # Which jobs are still available
400
+ self.avail_a = set(range(NUM_JOBS_A))
401
+ self.avail_b = set(range(NUM_JOBS_B))
402
+
403
+ # Phase: 'A' = building Line A sequence, 'B' = building Line B sequence
404
+ self.phase = 'A'
405
+ self.done = False
406
+ self.makespan = 0.0
407
+ self.step_count = 0
408
+
409
+ return self._get_obs()
410
+
411
+ def _sample(self, base):
412
+ if not self.stochastic:
413
+ return base.copy()
414
+ noise = 1.0 + self.rng.normal(0, self.noise_std, base.shape)
415
+ return np.maximum(base * np.clip(noise, 0.8, 1.2), 1.0)
416
+
417
+ def get_mask_a(self):
418
+ """Mask for Line A action head. Valid only during phase A."""
419
+ mask = np.zeros(NUM_JOBS_A + 1)
420
+ if self.phase == 'A':
421
+ for j in self.avail_a:
422
+ mask[j] = 1.0
423
+ else:
424
+ mask[NUM_JOBS_A] = 1.0 # idle/no-op during phase B
425
+ return mask
426
+
427
+ def get_mask_b(self):
428
+ """Mask for Line B action head. Valid only during phase B."""
429
+ mask = np.zeros(NUM_JOBS_B + 1)
430
+ if self.phase == 'B':
431
+ for j in self.avail_b:
432
+ mask[j] = 1.0
433
+ else:
434
+ mask[NUM_JOBS_B] = 1.0 # idle/no-op during phase A
435
+ return mask
436
+
437
+ def step(self, action_a, action_b):
438
+ if self.done:
439
+ return self._get_obs(), 0.0, True, {"makespan": self.makespan}
440
+
441
+ self.step_count += 1
442
+
443
+ if self.phase == 'A':
444
+ # Line A decision
445
+ j = action_a
446
+ if j in self.avail_a:
447
+ self.seq_a.append(j)
448
+ self.avail_a.remove(j)
449
+
450
+ if len(self.seq_a) == NUM_JOBS_A:
451
+ self.phase = 'B'
452
+
453
+ elif self.phase == 'B':
454
+ # Line B decision
455
+ j = action_b
456
+ if j in self.avail_b:
457
+ self.seq_b.append(j)
458
+ self.avail_b.remove(j)
459
+
460
+ if len(self.seq_b) == NUM_JOBS_B:
461
+ # Episode complete - evaluate
462
+ self.done = True
463
+ result = simulate_crmp(self.seq_a, self.seq_b,
464
+ self.proc_a, self.proc_b,
465
+ self.base_yield_g, self.base_yield_s,
466
+ self.base_demand_g, self.base_demand_s)
467
+ self.makespan = result["makespan"]
468
+
469
+ # Reward: only at end, negative makespan normalized
470
+ if self.done:
471
+ # Reward: higher is better. Target ~1307, normalize so good solutions get positive reward
472
+ reward = (1500 - self.makespan) / 200.0 # 1307 -> +0.965, 1500 -> 0, 1800 -> -1.5
473
+ else:
474
+ reward = 0.0
475
+
476
+ info = {"makespan": self.makespan if self.done else None,
477
+ "phase": self.phase, "steps": self.step_count}
478
+ return self._get_obs(), reward, self.done, info
479
+
480
+ def _get_obs(self):
481
+ obs = []
482
+
483
+ # Phase indicator (one-hot: A=1,0 B=0,1)
484
+ obs.append(1.0 if self.phase == 'A' else 0.0)
485
+ obs.append(1.0 if self.phase == 'B' else 0.0)
486
+
487
+ # Progress
488
+ obs.append(len(self.seq_a) / NUM_JOBS_A)
489
+ obs.append(len(self.seq_b) / NUM_JOBS_B)
490
+
491
+ # Line A job availability (8 dims)
492
+ for j in range(NUM_JOBS_A):
493
+ obs.append(1.0 if j in self.avail_a else 0.0)
494
+
495
+ # Line B job availability (6 dims)
496
+ for j in range(NUM_JOBS_B):
497
+ obs.append(1.0 if j in self.avail_b else 0.0)
498
+
499
+ # Processing time features for available jobs (normalized)
500
+ # Line A: total processing time per job (8 dims)
501
+ for j in range(NUM_JOBS_A):
502
+ obs.append(self.proc_a[j].sum() / 1000.0)
503
+
504
+ # Line B: total processing time per job (6 dims)
505
+ for j in range(NUM_JOBS_B):
506
+ obs.append(self.proc_b[j].sum() / 1000.0)
507
+
508
+ # Line B total material demand per job (6 dims each for gran and strip)
509
+ for j in range(NUM_JOBS_B):
510
+ obs.append(self.base_demand_g[j].sum() / 500.0)
511
+ for j in range(NUM_JOBS_B):
512
+ obs.append(self.base_demand_s[j].sum() / 500.0)
513
+
514
+ # Already-scheduled sequence features
515
+ # Partial Line A makespan estimate (if any jobs scheduled)
516
+ if len(self.seq_a) > 0:
517
+ partial_a_time = sum(self.proc_a[j].sum() for j in self.seq_a)
518
+ obs.append(partial_a_time / 2000.0)
519
+ else:
520
+ obs.append(0.0)
521
+
522
+ # Last scheduled job features
523
+ if len(self.seq_a) > 0:
524
+ last_j = self.seq_a[-1]
525
+ obs.append(self.proc_a[last_j].sum() / 1000.0)
526
+ else:
527
+ obs.append(0.0)
528
+
529
+ if len(self.seq_b) > 0:
530
+ last_j = self.seq_b[-1]
531
+ obs.append(self.proc_b[last_j].sum() / 1000.0)
532
+ else:
533
+ obs.append(0.0)
534
+
535
+ return np.array(obs, dtype=np.float64)
536
+
537
+
538
+ class CRMPEnvNonPerm:
539
+ """
540
+ CRMP Environment for Non-Permutation DRL.
541
+
542
+ Non-permutation: each machine on Line A can have a DIFFERENT job order.
543
+ The agent makes per-machine dispatching decisions.
544
+
545
+ Phase A: For each machine m=0..5, pick the order of 8 jobs (8 steps per machine, 48 total)
546
+ Phase B: For each machine m=0..2, pick the order of 6 jobs (6 steps per machine, 18 total)
547
+ Total: 66 steps per episode.
548
+
549
+ This is what gives DRL the potential to beat permutation-optimal 1307.
550
+ """
551
+
552
+ def __init__(self, stochastic=False, noise_std=0.1):
553
+ self.stochastic = stochastic
554
+ self.noise_std = noise_std
555
+ self.rng = np.random.default_rng()
556
+ self.reset()
557
+
558
+ @property
559
+ def obs_dim(self):
560
+ return self._get_obs().shape[0]
561
+
562
+ def reset(self, seed=None):
563
+ if seed is not None:
564
+ self.rng = np.random.default_rng(seed)
565
+
566
+ self.proc_a = self._sample(LINE_A_PROC)
567
+ self.proc_b = self._sample(LINE_B_PROC)
568
+
569
+ # Per-machine job orders
570
+ self.order_a = {m: [] for m in range(NUM_MACHINES_A)}
571
+ self.order_b = {m: [] for m in range(NUM_MACHINES_B)}
572
+
573
+ # Current machine being scheduled
574
+ self.current_line = 'A' # 'A' or 'B'
575
+ self.current_machine = 0
576
+ self.avail_jobs = set(range(NUM_JOBS_A))
577
+
578
+ self.done = False
579
+ self.makespan = 0.0
580
+ self.step_count = 0
581
+
582
+ return self._get_obs()
583
+
584
+ def _sample(self, base):
585
+ if not self.stochastic:
586
+ return base.copy()
587
+ noise = 1.0 + self.rng.normal(0, self.noise_std, base.shape)
588
+ return np.maximum(base * np.clip(noise, 0.8, 1.2), 1.0)
589
+
590
+ def get_mask_a(self):
591
+ mask = np.zeros(NUM_JOBS_A + 1)
592
+ if self.current_line == 'A':
593
+ for j in self.avail_jobs:
594
+ mask[j] = 1.0
595
+ else:
596
+ mask[NUM_JOBS_A] = 1.0
597
+ return mask
598
+
599
+ def get_mask_b(self):
600
+ mask = np.zeros(NUM_JOBS_B + 1)
601
+ if self.current_line == 'B':
602
+ for j in self.avail_jobs:
603
+ mask[j] = 1.0
604
+ else:
605
+ mask[NUM_JOBS_B] = 1.0
606
+ return mask
607
+
608
+ def step(self, action_a, action_b):
609
+ if self.done:
610
+ return self._get_obs(), 0.0, True, {"makespan": self.makespan}
611
+
612
+ self.step_count += 1
613
+
614
+ if self.current_line == 'A':
615
+ j = action_a
616
+ if j in self.avail_jobs:
617
+ self.order_a[self.current_machine].append(j)
618
+ self.avail_jobs.remove(j)
619
+ if not self.avail_jobs:
620
+ # Move to next machine or switch to Line B
621
+ self.current_machine += 1
622
+ if self.current_machine >= NUM_MACHINES_A:
623
+ self.current_line = 'B'
624
+ self.current_machine = 0
625
+ self.avail_jobs = set(range(NUM_JOBS_B))
626
+ else:
627
+ self.avail_jobs = set(range(NUM_JOBS_A))
628
+ elif self.current_line == 'B':
629
+ j = action_b
630
+ if j in self.avail_jobs:
631
+ self.order_b[self.current_machine].append(j)
632
+ self.avail_jobs.remove(j)
633
+ if not self.avail_jobs:
634
+ self.current_machine += 1
635
+ if self.current_machine >= NUM_MACHINES_B:
636
+ self.done = True
637
+ result = simulate_nonperm(self.order_a, self.order_b,
638
+ self.proc_a, self.proc_b)
639
+ self.makespan = result["makespan"]
640
+ else:
641
+ self.avail_jobs = set(range(NUM_JOBS_B))
642
+
643
+ if self.done:
644
+ reward = (1500 - self.makespan) / 200.0
645
+ else:
646
+ reward = 0.0
647
+
648
+ info = {"makespan": self.makespan if self.done else None,
649
+ "steps": self.step_count}
650
+ return self._get_obs(), reward, self.done, info
651
+
652
+ def _get_obs(self):
653
+ obs = []
654
+ # Line indicator
655
+ obs.append(1.0 if self.current_line == 'A' else 0.0)
656
+ obs.append(1.0 if self.current_line == 'B' else 0.0)
657
+ # Current machine (normalized)
658
+ obs.append(self.current_machine / max(NUM_MACHINES_A, NUM_MACHINES_B))
659
+ # Progress
660
+ if self.current_line == 'A':
661
+ total_steps = NUM_JOBS_A * NUM_MACHINES_A + NUM_JOBS_B * NUM_MACHINES_B
662
+ done_steps = self.current_machine * NUM_JOBS_A + (NUM_JOBS_A - len(self.avail_jobs))
663
+ else:
664
+ done_steps = NUM_JOBS_A * NUM_MACHINES_A + self.current_machine * NUM_JOBS_B + (NUM_JOBS_B - len(self.avail_jobs))
665
+ total_steps = NUM_JOBS_A * NUM_MACHINES_A + NUM_JOBS_B * NUM_MACHINES_B
666
+ obs.append(done_steps / total_steps)
667
+
668
+ # Available jobs
669
+ if self.current_line == 'A':
670
+ for j in range(NUM_JOBS_A):
671
+ obs.append(1.0 if j in self.avail_jobs else 0.0)
672
+ for j in range(NUM_JOBS_B):
673
+ obs.append(0.0)
674
+ else:
675
+ for j in range(NUM_JOBS_A):
676
+ obs.append(0.0)
677
+ for j in range(NUM_JOBS_B):
678
+ obs.append(1.0 if j in self.avail_jobs else 0.0)
679
+
680
+ # Processing times
681
+ for j in range(NUM_JOBS_A):
682
+ obs.append(self.proc_a[j].sum() / 1000.0)
683
+ for j in range(NUM_JOBS_B):
684
+ obs.append(self.proc_b[j].sum() / 1000.0)
685
+
686
+ # Current machine processing times
687
+ if self.current_line == 'A' and self.current_machine < NUM_MACHINES_A:
688
+ for j in range(NUM_JOBS_A):
689
+ obs.append(self.proc_a[j, self.current_machine] / 200.0)
690
+ else:
691
+ for j in range(NUM_JOBS_A):
692
+ obs.append(0.0)
693
+
694
+ if self.current_line == 'B' and self.current_machine < NUM_MACHINES_B:
695
+ for j in range(NUM_JOBS_B):
696
+ obs.append(self.proc_b[j, self.current_machine] / 200.0)
697
+ else:
698
+ for j in range(NUM_JOBS_B):
699
+ obs.append(0.0)
700
+
701
+ return np.array(obs, dtype=np.float64)
702
+
703
+
704
+ if __name__ == "__main__":
705
+ import time
706
+
707
+ print("CRMP Environment - Formal Paper Data (Yin et al. 2021)")
708
+ print("=" * 60)
709
+
710
+ ok = verify_data()
711
+ print(f"Material balance feasible: {ok}")
712
+ print()
713
+
714
+ print("Paper benchmarks (Real dataset, Table 5):")
715
+ print(" FCFS: 1457 min")
716
+ print(" Campbell-Dudek: 1340 best, 1361 avg")
717
+ print(" GA: 1307 best, 1315 avg")
718
+ print()
719
+
720
+ # FCFS
721
+ ms = evaluate_sequence(list(range(NUM_JOBS_A)), list(range(NUM_JOBS_B)))
722
+ print(f"Our FCFS (permutation): {ms:.0f} min")
723
+
724
+ # Paper's GA best sequence
725
+ ga_a = [5, 0, 1, 6, 7, 3, 4, 2]
726
+ ga_b = [0, 2, 5, 4, 3, 1]
727
+ ms_ga = evaluate_sequence(ga_a, ga_b)
728
+ print(f"Paper GA best (permutation): {ms_ga:.0f} min")
729
+
730
+ # Non-permutation with same sequence (should match permutation)
731
+ ms_np = simulate_nonperm(ga_a, ga_b)["makespan"]
732
+ print(f"Non-perm with GA seq (same order all machines): {ms_np:.0f} min")
733
+
734
+ # Test CRMPEnv
735
+ print("\nTesting CRMPEnv (sequence builder)...")
736
+ env = CRMPEnv(stochastic=False)
737
+ obs = env.reset()
738
+ print(f" Obs dim: {len(obs)}")
739
+ # Feed GA sequence
740
+ for j in ga_a:
741
+ obs, r, done, info = env.step(j, NUM_JOBS_B) # idle on B during phase A
742
+ for j in ga_b:
743
+ obs, r, done, info = env.step(NUM_JOBS_A, j) # idle on A during phase B
744
+ print(f" GA sequence makespan via env: {info['makespan']:.0f}")
745
+ print(f" Steps: {info['steps']}, Done: {done}")
746
+
747
+ # Quick non-perm search
748
+ print("\nNon-permutation random search (50k)...")
749
+ best_np = float('inf')
750
+ best_orders = None
751
+ rng = np.random.default_rng(42)
752
+ t0 = time.time()
753
+ for i in range(50000):
754
+ oa = {m: rng.permutation(NUM_JOBS_A).tolist() for m in range(NUM_MACHINES_A)}
755
+ ob = {m: rng.permutation(NUM_JOBS_B).tolist() for m in range(NUM_MACHINES_B)}
756
+ try:
757
+ r = simulate_nonperm(oa, ob)
758
+ if r["makespan"] < best_np:
759
+ best_np = r["makespan"]
760
+ best_orders = (oa, ob)
761
+ if i % 5000 == 0 or best_np < 1307:
762
+ print(f" [{i+1:6d}] Best non-perm: {best_np:.0f}")
763
+ except:
764
+ pass
765
+ elapsed = time.time() - t0
766
+ print(f" Non-perm random best: {best_np:.0f} ({elapsed:.1f}s)")
767
+ if best_np < 1307:
768
+ print(f" *** NON-PERM BEATS PERMUTATION GA by {1307-best_np:.0f} min ***")