younadi commited on
Commit
8c274bd
·
1 Parent(s): 002ca5d

Updated create_dataset.py in d_18_02_2026_14_43_50_historic_netball/ and created the latter dataset

Browse files
datasets/d_18_02_2026_14_43_50_historic_netball/create_dataset.py ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import json
4
+ import itertools
5
+ import math
6
+ import tqdm
7
+
8
+
9
+ def generate_random_pfsp_instance(nb_jobs, nb_machines, time_min, time_max, seed=97):
10
+ #TODO: add the possibility to simply load an instance from a file
11
+ """
12
+ Generates a random instance of the Permutation Flow Shop Problem (PFSP).
13
+ Parameters:
14
+ - nb_jobs: Number of jobs (n).
15
+ - nb_machines: Number of machines (m).
16
+ - time_min: Minimum processing time for any job on any machine.
17
+ - time_max: Maximum processing time for any job on any machine.
18
+ Returns:
19
+ - A 2D list (matrix) of size (nb_jobs x nb_machines) where each entry is a random processing time between time_min and time_max.
20
+ """
21
+ if seed is not None: np.random.seed(seed)
22
+ return np.random.randint(time_min, time_max + 1, size=(nb_jobs, nb_machines))
23
+
24
+
25
+ def fit_palmer(pfsp_instance: np.ndarray):
26
+ """
27
+ Implements Palmer's heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
28
+ For now I am using an old code that performs palmer by interfacing with it, but it should be refactored to be cleaner and more efficient.
29
+ Parameters:
30
+ - pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
31
+ Returns:
32
+ - A tuple (schedule, makespan) where:
33
+ - schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
34
+ - makespan: The total completion time for the given schedule.
35
+ """
36
+
37
+ # =====================================================================================
38
+ class Palmer:
39
+ def __init__(self, jobs_list: list):
40
+ self.jobs_list = jobs_list
41
+ self.nb_jobs = len(jobs_list)
42
+ self.nb_machines = len(jobs_list[0])
43
+ self.seq_star = None
44
+ self.make_span_star = None
45
+
46
+ # utility function that returns the gantt cumule based on a job execution times and a previous gantt cumule
47
+ def cumulate(self, job: list, previous_cumul=None):
48
+ res = [0] * len(job)
49
+
50
+ if previous_cumul == None:
51
+ res[0] = job[0]
52
+ for i in range(1, len(job)):
53
+ res[i] = res[i - 1] + job[i]
54
+ else:
55
+ res[0] = previous_cumul[0] + job[0]
56
+ for i in range(1, len(job)):
57
+ res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
58
+
59
+ return res
60
+
61
+ # utility function that computes the gantt cumule given only a job sequence (not used in the algorithm due to inneficiency
62
+ # dynamic programming with cumulate is used instead ...)
63
+ def cumulate_seq(self, seq: list):
64
+ cumulated = None
65
+ for i in seq:
66
+ cumulated = self.cumulate(self.jobs_list[i], cumulated)
67
+
68
+ return cumulated
69
+
70
+ # launching the optimization
71
+ def optim(self, debug=False):
72
+ jobs_weights = []
73
+ for i, job in zip(range(self.nb_jobs), self.jobs_list):
74
+ weight = 0
75
+ for j in range(self.nb_machines):
76
+ if debug == True:
77
+ print(
78
+ f">job {i} mach {j} first term: {(2*(j+1) - 1) - self.nb_machines}"
79
+ )
80
+ print(f">job {i} mach {j} second term: {job[j]}")
81
+ print(
82
+ "------------------------------------------------------------------"
83
+ )
84
+ weight += ((2 * (j + 1) - 1) - self.nb_machines) * job[j]
85
+ if debug == True:
86
+ print(f"===>> job {i} weight: {weight}")
87
+ jobs_weights.append((weight, i))
88
+
89
+ self.seq_star = [tu[1] for tu in sorted(jobs_weights, reverse=True)]
90
+ self.make_span_star = self.cumulate_seq(self.seq_star)[-1]
91
+
92
+ return (self.seq_star, self.make_span_star)
93
+
94
+ # =====================================================================================
95
+
96
+ # Interfacing with the underlying old palmer code
97
+ jobs_list = pfsp_instance.tolist()
98
+ palmer_schedule, palmer_makespan = Palmer(jobs_list).optim()
99
+
100
+ # Returning the schedule and makespan as numpy arrays of type int32
101
+ return np.array(palmer_schedule, dtype=np.int32), np.int32(palmer_makespan)
102
+
103
+
104
+ def fit_cds(pfsp_instance: np.ndarray):
105
+ """
106
+ Implements CDS heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
107
+ For now I am using an old code that performs cds by interfacing with it, but it should be refactored to be cleaner and more efficient.
108
+ Parameters:
109
+ - pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
110
+ Returns:
111
+ - A tuple (schedule, makespan) where:
112
+ - schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
113
+ - makespan: The total completion time for the given schedule.
114
+ """
115
+
116
+ # =====================================================================================
117
+ # Function to cumulate job processing times
118
+ def cumulate(job, previous_cumul=None):
119
+ res = [0] * len(job)
120
+ if previous_cumul is None:
121
+ res[0] = job[0]
122
+ for i in range(1, len(job)):
123
+ res[i] = res[i - 1] + job[i]
124
+ else:
125
+ res[0] = previous_cumul[0] + job[0]
126
+ for i in range(1, len(job)):
127
+ res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
128
+ return res
129
+
130
+ # Function to cumulate processing times for a given sequence of jobs
131
+ def cumulate_seq(seq, jobs_list):
132
+ cumulated = None
133
+ for i in seq:
134
+ cumulated = cumulate(jobs_list[i], cumulated)
135
+ return cumulated
136
+
137
+ # Function to compute the makespan given a sequence of jobs and the job list
138
+ def makespan(sequence, job_list):
139
+ return cumulate_seq(sequence, job_list)[-1]
140
+
141
+ # Function to perform the Johnson's algorithm for the flow shop problem
142
+ def johnson_algorithm(matrix):
143
+ n = matrix.shape[0]
144
+ sequence = []
145
+ machines = [[], []]
146
+
147
+ # Preprocessing to determine the order of jobs
148
+ for i in range(n):
149
+ if matrix[i][0] < matrix[i][1]: # if time(m1) < time(m2)
150
+ machines[0].append((matrix[i][0], i))
151
+ else:
152
+ machines[1].append((matrix[i][1], i))
153
+
154
+ # Sorting jobs for each machine
155
+ machines[0] = sorted(
156
+ machines[0], key=lambda x: x[0]
157
+ ) # ascending sort for the first machine
158
+ machines[1] = sorted(
159
+ machines[1], key=lambda x: x[0], reverse=True
160
+ ) # descending sort for the second machine
161
+
162
+ # Merging the two sorted lists
163
+ merged = machines[0] + machines[1]
164
+
165
+ # Constructing the optimal sequence
166
+ sequence = [index for _, index in merged]
167
+
168
+ return sequence
169
+
170
+ # Function that applies Johnson's algorithm and computes the makespan
171
+ def johnson(job_matrix, data_matrix):
172
+ sequence = johnson_algorithm(job_matrix)
173
+ return sequence, makespan(sequence, data_matrix)
174
+
175
+ # CDS heuristic
176
+ def cds_heuristic(matrix):
177
+ n = matrix.shape[0]
178
+ m = matrix.shape[1]
179
+ best_makespan = float("inf")
180
+ best_sequences = []
181
+
182
+ # Step 1: Generate matrices of all possible job lists
183
+ for i in range(1, m):
184
+ machine_subset_1 = matrix[:, :i].sum(axis=1)
185
+ machine_subset_2 = matrix[:, -i:].sum(axis=1)
186
+ job_matrix = np.column_stack((machine_subset_1, machine_subset_2))
187
+
188
+ # Step 2: Apply Johnson's algorithm to the job matrix abd calculate the makespan
189
+ sequence, makespan_value = johnson(job_matrix, matrix)
190
+
191
+ # Step 3: Update the best makespan and corresponding sequences
192
+ if makespan_value < best_makespan:
193
+ best_makespan = makespan_value
194
+ best_sequences = [sequence]
195
+ elif makespan_value == best_makespan:
196
+ best_sequences.append(sequence)
197
+
198
+ return best_sequences[0], best_makespan
199
+
200
+ # =====================================================================================
201
+
202
+ # Interfacing with the underlying old cds code
203
+ cds_schedule, cds_makespan = cds_heuristic(pfsp_instance)
204
+
205
+ # Returning the schedule and makespan as numpy arrays of type int32
206
+ return np.array(cds_schedule, dtype=np.int32), np.int32(cds_makespan)
207
+
208
+
209
+ def fit_neh(pfsp_instance: np.ndarray):
210
+ """
211
+ Implements NEH heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
212
+ For now I am using an old code that performs neh by interfacing with it, but it should be refactored to be cleaner and more efficient.
213
+ Parameters:
214
+ - pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
215
+ Returns:
216
+ - A tuple (schedule, makespan) where:
217
+ - schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
218
+ - makespan: The total completion time for the given schedule.
219
+ """
220
+
221
+ # =====================================================================================
222
+ class Inst:
223
+ def __init__(
224
+ self,
225
+ jobs: int,
226
+ machines: int,
227
+ seed: int,
228
+ ub: int,
229
+ lb: int,
230
+ matrix: list[list[int]],
231
+ ):
232
+ self.jobs = jobs
233
+ self.machines = machines
234
+ self.seed = seed
235
+ self.ub = ub
236
+ self.lb = lb
237
+ self.matrix = matrix
238
+
239
+ def __repr__(self) -> str:
240
+ return f"Inst(jobs={self.jobs}, machines={self.machines}, seed={self.seed}, ub={self.ub}, lb={self.lb}, matrix={self.matrix})"
241
+
242
+ class NEH:
243
+ def __init__(self, instance: Inst, debug: bool = False):
244
+ self.instance = instance
245
+ self.debug = debug
246
+
247
+ def calculate_sj(self, job: int) -> int:
248
+ sj = 0
249
+ for machine in range(self.instance.machines):
250
+ sj += self.instance.matrix[machine][job]
251
+ return sj
252
+
253
+ def sort_jobs(self, reverse: bool = False) -> list[int]:
254
+ return sorted(
255
+ range(self.instance.jobs),
256
+ key=lambda job: self.calculate_sj(job),
257
+ reverse=reverse,
258
+ )
259
+
260
+ def emulate(self, jobs: list[int]) -> list[int]:
261
+ machines_exec = [0] * self.instance.machines
262
+ for job in jobs:
263
+ for current_machine in range(self.instance.machines):
264
+ # Add jobs execution time to current machine
265
+ machines_exec[current_machine] += self.instance.matrix[
266
+ current_machine
267
+ ][job]
268
+
269
+ # Sync other machines if they are behind current time
270
+ for machine in range(current_machine + 1, self.instance.machines):
271
+ machines_exec[machine] = max(
272
+ machines_exec[current_machine], machines_exec[machine]
273
+ )
274
+
275
+ return machines_exec
276
+
277
+ def calculate_cmax(self, jobs: list[int]) -> int:
278
+ return self.emulate(jobs)[-1]
279
+
280
+ def get_best_order(self, orders: list[list[int]]) -> tuple[int, list[int]]:
281
+ min_cmax = float("inf")
282
+ min_order = None
283
+ for order in orders:
284
+ cmax = self.calculate_cmax(order)
285
+ if cmax < min_cmax:
286
+ min_cmax = cmax
287
+ min_order = order
288
+
289
+ return min_cmax, min_order
290
+
291
+ def get_best_position(
292
+ self, order: list[int], job: int
293
+ ) -> tuple[int, list[int]]:
294
+ possible_orders: list[list[int]] = []
295
+ for pos in range(len(order) + 1):
296
+ possible_orders.append(order[:pos] + [job] + order[pos:])
297
+
298
+ return self.get_best_order(possible_orders)
299
+
300
+ def __call__(self) -> tuple[int, list[int]]:
301
+ if self.instance.jobs < 2:
302
+ raise ValueError("Number of jobs must be greater than 2")
303
+
304
+ sorted_jobs = self.sort_jobs()
305
+ current_cmax, current_order = self.get_best_order(
306
+ [sorted_jobs[:2], sorted_jobs[:2][::-1]]
307
+ )
308
+
309
+ if self.debug:
310
+ print(current_cmax, current_order)
311
+
312
+ if self.instance.jobs == 2:
313
+ return current_cmax, current_order
314
+
315
+ for job in sorted_jobs[2:]:
316
+ current_cmax, current_order = self.get_best_position(current_order, job)
317
+ if self.debug:
318
+ print(current_cmax, current_order)
319
+
320
+ return current_cmax, current_order
321
+
322
+ # =====================================================================================
323
+
324
+ # Interfacing with the underlying old neh code
325
+ neh_instance_jobs = pfsp_instance.shape[0]
326
+ neh_instance_machines = pfsp_instance.shape[1]
327
+ neh_instance_matrix = pfsp_instance.T.tolist()
328
+ neh_instance = Inst(
329
+ neh_instance_jobs,
330
+ neh_instance_machines,
331
+ seed=0,
332
+ ub=0,
333
+ lb=0,
334
+ matrix=neh_instance_matrix,
335
+ )
336
+ neh_makespan, neh_schedule = NEH(neh_instance)()
337
+
338
+ # Returning the schedule and makespan as numpy arrays of type int32
339
+ return np.array(neh_schedule, dtype=np.int32), np.int32(neh_makespan)
340
+
341
+
342
+ def evaluate_makespan(pfsp_instance, schedule):
343
+ """
344
+ Evaluates the makespan (completion time) of a given schedule for a given pfsp_instance.
345
+ Parameters:
346
+ - pfsp_instance: A list of lists, where pfsp_instance[i][j] is the processing time of job i on machine j.
347
+ - schedule: A list/tuple indicating the order of jobs (e.g., [0, 2, 1]).
348
+ Returns:
349
+ - The makespan (total completion time) for the given schedule.
350
+ """
351
+
352
+ def cumulate(job: list, previous_cumul=None):
353
+ # Calculate the cumulative completion times for a job
354
+
355
+ res = [0] * len(job)
356
+ if previous_cumul == None:
357
+ res[0] = job[0]
358
+ for i in range(1, len(job)):
359
+ res[i] = res[i - 1] + job[i]
360
+ else:
361
+ res[0] = previous_cumul[0] + job[0]
362
+ for i in range(1, len(job)):
363
+ res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
364
+ return res
365
+
366
+ def cumulate_seq(pfsp_instance: list, schedule: list):
367
+ # Calculates the cumulative time for a sequence of jobs on machines.
368
+
369
+ cumulated = None
370
+ for i in schedule:
371
+ cumulated = cumulate(pfsp_instance[i], cumulated)
372
+ return cumulated
373
+
374
+ cumulative = cumulate_seq(pfsp_instance, schedule)
375
+ return cumulative[-1]
376
+
377
+
378
+ def create_dataset(
379
+ pfsp_instance,
380
+ nb_base_samples,
381
+ duplication_factor=0.0,
382
+ init_type="random",
383
+ data_folder_location="./",
384
+ data_folder_name=None,
385
+ seed=97
386
+ ):
387
+ if init_type == "exhaustive":
388
+ nb_base_samples = math.factorial(pfsp_instance.shape[0])
389
+
390
+ nb_samples = nb_base_samples + int(nb_base_samples * duplication_factor)
391
+
392
+ if seed is not None: np.random.seed(seed)
393
+
394
+ def perturb_schedule(schedule):
395
+ perturbed_schedule = schedule[:]
396
+ i, j = np.random.choice(perturbed_schedule.shape[0], size=2, replace=False)
397
+ perturbed_schedule[[i,j]] = perturbed_schedule[[j,i]]
398
+ return perturbed_schedule, evaluate_makespan(pfsp_instance, perturbed_schedule)
399
+
400
+ # Create the folder if it doesn't exist
401
+ if data_folder_name is None: data_folder_name = f"ftd_{time.strftime('%d_%m_%Y_%H_%M_%S')}"
402
+ else: data_folder_name = f"ftd_{data_folder_name}"
403
+ data_path = os.path.join(data_folder_location, data_folder_name)
404
+ os.makedirs(data_path, exist_ok=True)
405
+
406
+ # Create the np memmap files for schedules and makespans
407
+ nb_jobs = pfsp_instance.shape[0]
408
+ schedules = np.memmap(os.path.join(data_path,"schedules.bin"), dtype=np.int32, mode='w+', shape=(nb_samples, nb_jobs))
409
+ makespans = np.memmap(os.path.join(data_path,"makespans.bin"), dtype=np.int32, mode='w+', shape=(nb_samples,))
410
+
411
+ # Save the pfsp instance as a numpy file
412
+ np.save(os.path.join(data_path,"pfsp_instance.npy"), pfsp_instance)
413
+
414
+ # Create a metadata dictionary and save it as a json file
415
+ metadata_dict = {
416
+ "nb_base_samples": nb_base_samples,
417
+ "duplication_factor": duplication_factor,
418
+ "nb_samples": nb_samples,
419
+ "nb_jobs": nb_jobs,
420
+ "nb_machines": pfsp_instance.shape[1],
421
+ "init_type": init_type,
422
+ "data_path": data_path,
423
+ "seed": seed,
424
+ "date_time": time.strftime('%d_%m_%Y_%H_%M_%S')
425
+ }
426
+
427
+ with open(os.path.join(data_path,"metadata.json"), "w") as f:
428
+ json.dump(metadata_dict, f, indent=4)
429
+
430
+ if init_type == "exhaustive":
431
+ for i, schedule in tqdm.tqdm(enumerate(itertools.permutations(range(nb_jobs))), total=math.factorial(nb_jobs)):
432
+ schedules[i] = schedule
433
+ makespans[i] = evaluate_makespan(pfsp_instance, schedule)
434
+
435
+ elif init_type == "cds":
436
+ cds_schedule, cds_makespan = fit_cds(pfsp_instance)
437
+ schedules[0] = cds_schedule
438
+ makespans[0] = cds_makespan
439
+ for i in tqdm.tqdm(range(1, nb_base_samples), desc="Generating CDS samples"):
440
+ schedules[i], makespans[i] = perturb_schedule(cds_schedule)
441
+
442
+ elif init_type == "palmer":
443
+ palmer_schedule, palmer_makespan = fit_palmer(pfsp_instance)
444
+ schedules[0] = palmer_schedule
445
+ makespans[0] = palmer_makespan
446
+ for i in tqdm.tqdm(range(1, nb_base_samples), desc="Generating Palmer samples"):
447
+ schedules[i], makespans[i] = perturb_schedule(palmer_schedule)
448
+
449
+ elif init_type == "neh":
450
+ neh_schedule, neh_makespan = fit_neh(pfsp_instance)
451
+ schedules[0] = neh_schedule
452
+ makespans[0] = neh_makespan
453
+ for i in tqdm.tqdm(range(1, nb_base_samples), desc="Generating NEH samples"):
454
+ schedules[i], makespans[i] = perturb_schedule(neh_schedule)
455
+
456
+ elif init_type == "heuristics":
457
+ cds_schedule, cds_makespan = fit_cds(pfsp_instance)
458
+ schedules[0], makespans[0] = cds_schedule, cds_makespan
459
+ cds_size = nb_base_samples // 3
460
+ for i in tqdm.tqdm(range(1, cds_size), desc="Generating CDS heuristic samples"):
461
+ schedules[i], makespans[i] = perturb_schedule(cds_schedule)
462
+ i+=1
463
+ palmer_schedule, palmer_makespan = fit_palmer(pfsp_instance)
464
+ schedules[i], makespans[i] = palmer_schedule, palmer_makespan
465
+ palmer_size = nb_base_samples // 3
466
+ for i in tqdm.tqdm(range(i+1, i+palmer_size), desc="Generating Palmer heuristic samples"):
467
+ schedules[i], makespans[i] = perturb_schedule(palmer_schedule)
468
+ i+=1
469
+ neh_schedule, neh_makespan = fit_neh(pfsp_instance)
470
+ schedules[i], makespans[i] = neh_schedule, neh_makespan
471
+ neh_size = nb_base_samples - cds_size - palmer_size
472
+ for i in tqdm.tqdm(range(i+1, i+neh_size), desc="Generating NEH heuristic samples"):
473
+ schedules[i], makespans[i] = perturb_schedule(neh_schedule)
474
+
475
+ elif init_type == "random":
476
+ for i in tqdm.tqdm(range(nb_base_samples), desc="Generating Random samples"):
477
+ schedule = np.random.permutation(pfsp_instance.shape[0])
478
+ makespan = evaluate_makespan(pfsp_instance, schedule)
479
+ schedules[i] = schedule
480
+ makespans[i] = makespan
481
+
482
+ else:
483
+ raise ValueError("Invalid initialization type")
484
+
485
+ # Add the duplicated samples. I sample with repetition from the base samples and then add them to the dataset
486
+ duplicated_schedules_idx = np.random.choice(nb_base_samples, size=nb_samples - nb_base_samples, replace=True)
487
+ schedules[nb_base_samples:nb_samples] = schedules[duplicated_schedules_idx]
488
+ makespans[nb_base_samples:nb_samples] = makespans[duplicated_schedules_idx]
489
+
490
+ # Flush and return
491
+ schedules.flush()
492
+ makespans.flush()
493
+ return schedules, makespans
494
+
495
+
496
+ if __name__ == "__main__":
497
+ # Parse arguments and call create_dataset with the appropriate parameters
498
+ import argparse
499
+ import time
500
+ parser = argparse.ArgumentParser(description="Create a dataset for the flowshop scheduling problem")
501
+ parser.add_argument("--nb_jobs", type=int, default=4, help="Number of jobs")
502
+ parser.add_argument("--nb_machines", type=int, default=2, help="Number of machines")
503
+ parser.add_argument("--time_min", type=int, default=1, help="Minimum processing time")
504
+ parser.add_argument("--time_max", type=int, default=100, help="Maximum processing time")
505
+ parser.add_argument("--nb_base_samples", type=int, default=1000, help="Number of base samples to generate before duplication")
506
+ parser.add_argument("--duplication_factor", type=float, default=0.1, help="Factor by which to duplicate the base samples (e.g., 0.5 means 50% more samples will be created by duplicating the base samples)")
507
+ parser.add_argument("--init_type", type=str, default="exhaustive", choices=["exhaustive", "cds", "palmer", "neh", "heuristics", "random"], help="Initialization type for the base samples")
508
+ parser.add_argument("--data_folder_location", type=str, default="./", help="Location where the dataset folder will be created")
509
+ parser.add_argument("--data_folder_name", type=str, default=None, help="Name of the dataset folder (if not provided, a name with the current date and time will be generated)")
510
+ parser.add_argument("--seed", type=int, default=97, help="Random seed for reproducibility (set to None for no seeding)")
511
+ args = parser.parse_args()
512
+ pfsp_instance = generate_random_pfsp_instance(args.nb_jobs, args.nb_machines, args.time_min, args.time_max, seed=args.seed)
513
+ schedules, makespans = create_dataset(
514
+ pfsp_instance=pfsp_instance,
515
+ nb_base_samples=args.nb_base_samples,
516
+ duplication_factor=args.duplication_factor,
517
+ init_type=args.init_type,
518
+ data_folder_location=args.data_folder_location,
519
+ data_folder_name=args.data_folder_name,
520
+ seed=args.seed
521
+ )
datasets/d_18_02_2026_14_43_50_historic_netball/d_desc.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dataset ID
2
+
3
+ d_18_02_2026_14_43_50_historic_netball
4
+
5
+ # Dataset Description
6
+
7
+ This dataset focuses on ...
8
+
9
+ # Dataset Tags
10
+
11
+ - d:sports
12
+ - ...
datasets/d_18_02_2026_14_43_50_historic_netball/ftd_20_02_2026_00_58_13/makespans.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb6c637a3c570e3bf9a72d5e1b3b53ed61c156f23a3d66fe62e67c52fd44ea9
3
+ size 22176
datasets/d_18_02_2026_14_43_50_historic_netball/ftd_20_02_2026_00_58_13/metadata.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nb_base_samples": 5040,
3
+ "duplication_factor": 0.1,
4
+ "nb_samples": 5544,
5
+ "nb_jobs": 7,
6
+ "nb_machines": 2,
7
+ "init_type": "exhaustive",
8
+ "data_path": "./ftd_20_02_2026_00_58_13",
9
+ "seed": 97,
10
+ "date_time": "20_02_2026_00_58_13"
11
+ }
datasets/d_18_02_2026_14_43_50_historic_netball/ftd_20_02_2026_00_58_13/pfsp_instance.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec110fb8e0df28666bd9adbde900bd67199d34c33b2f060a44a2d5746b4b82b
3
+ size 240
datasets/d_18_02_2026_14_43_50_historic_netball/ftd_20_02_2026_00_58_13/schedules.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a5221ad691e9eb37fdd730e3c66845c23fed5e15008f5ed09b8d62e7edd66a
3
+ size 155232
datasets/d_18_02_2026_14_43_50_historic_netball/script.bash ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ python create_dataset.py --nb_jobs 7\
2
+ --nb_machines 2\
3
+ --time_min 1\
4
+ --time_max 10\
5
+ --nb_base_samples 1000\
6
+ --duplication_factor 0.1\
7
+ --init_type exhaustive\
8
+ --data_folder_location ./\
utils/create_dataset.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import json
4
+
5
+
6
+ def generate_random_pfsp_instance(nb_jobs, nb_machines, time_min, time_max):
7
+ """
8
+ Generates a random instance of the Permutation Flow Shop Problem (PFSP).
9
+ Parameters:
10
+ - nb_jobs: Number of jobs (n).
11
+ - nb_machines: Number of machines (m).
12
+ - time_min: Minimum processing time for any job on any machine.
13
+ - time_max: Maximum processing time for any job on any machine.
14
+ Returns:
15
+ - A 2D list (matrix) of size (nb_jobs x nb_machines) where each entry is a random processing time between time_min and time_max.
16
+ """
17
+ return np.random.randint(time_min, time_max + 1, size=(nb_jobs, nb_machines))
18
+
19
+
20
+ def fit_palmer(pfsp_instance: np.ndarray):
21
+ """
22
+ Implements Palmer's heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
23
+ For now I am using an old code that performs palmer by interfacing with it, but it should be refactored to be cleaner and more efficient.
24
+ Parameters:
25
+ - pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
26
+ Returns:
27
+ - A tuple (schedule, makespan) where:
28
+ - schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
29
+ - makespan: The total completion time for the given schedule.
30
+ """
31
+
32
+ # =====================================================================================
33
+ class Palmer:
34
+ def __init__(self, jobs_list: list):
35
+ self.jobs_list = jobs_list
36
+ self.nb_jobs = len(jobs_list)
37
+ self.nb_machines = len(jobs_list[0])
38
+ self.seq_star = None
39
+ self.make_span_star = None
40
+
41
+ # utility function that returns the gantt cumule based on a job execution times and a previous gantt cumule
42
+ def cumulate(self, job: list, previous_cumul=None):
43
+ res = [0] * len(job)
44
+
45
+ if previous_cumul == None:
46
+ res[0] = job[0]
47
+ for i in range(1, len(job)):
48
+ res[i] = res[i - 1] + job[i]
49
+ else:
50
+ res[0] = previous_cumul[0] + job[0]
51
+ for i in range(1, len(job)):
52
+ res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
53
+
54
+ return res
55
+
56
+ # utility function that computes the gantt cumule given only a job sequence (not used in the algorithm due to inneficiency
57
+ # dynamic programming with cumulate is used instead ...)
58
+ def cumulate_seq(self, seq: list):
59
+ cumulated = None
60
+ for i in seq:
61
+ cumulated = self.cumulate(self.jobs_list[i], cumulated)
62
+
63
+ return cumulated
64
+
65
+ # launching the optimization
66
+ def optim(self, debug=False):
67
+ jobs_weights = []
68
+ for i, job in zip(range(self.nb_jobs), self.jobs_list):
69
+ weight = 0
70
+ for j in range(self.nb_machines):
71
+ if debug == True:
72
+ print(
73
+ f">job {i} mach {j} first term: {(2*(j+1) - 1) - self.nb_machines}"
74
+ )
75
+ print(f">job {i} mach {j} second term: {job[j]}")
76
+ print(
77
+ "------------------------------------------------------------------"
78
+ )
79
+ weight += ((2 * (j + 1) - 1) - self.nb_machines) * job[j]
80
+ if debug == True:
81
+ print(f"===>> job {i} weight: {weight}")
82
+ jobs_weights.append((weight, i))
83
+
84
+ self.seq_star = [tu[1] for tu in sorted(jobs_weights, reverse=True)]
85
+ self.make_span_star = self.cumulate_seq(self.seq_star)[-1]
86
+
87
+ return (self.seq_star, self.make_span_star)
88
+
89
+ # =====================================================================================
90
+
91
+ # Interfacing with the underlying old palmer code
92
+ jobs_list = pfsp_instance.tolist()
93
+ palmer_schedule, palmer_makespan = Palmer(jobs_list).optim()
94
+
95
+ # Returning the schedule and makespan as numpy arrays of type int32
96
+ return np.array(palmer_schedule, dtype=np.int32), np.int32(palmer_makespan)
97
+
98
+
99
+ def fit_cds(pfsp_instance: np.ndarray):
100
+ """
101
+ Implements CDS heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
102
+ For now I am using an old code that performs cds by interfacing with it, but it should be refactored to be cleaner and more efficient.
103
+ Parameters:
104
+ - pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
105
+ Returns:
106
+ - A tuple (schedule, makespan) where:
107
+ - schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
108
+ - makespan: The total completion time for the given schedule.
109
+ """
110
+
111
+ # =====================================================================================
112
+ # Function to cumulate job processing times
113
+ def cumulate(job, previous_cumul=None):
114
+ res = [0] * len(job)
115
+ if previous_cumul is None:
116
+ res[0] = job[0]
117
+ for i in range(1, len(job)):
118
+ res[i] = res[i - 1] + job[i]
119
+ else:
120
+ res[0] = previous_cumul[0] + job[0]
121
+ for i in range(1, len(job)):
122
+ res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
123
+ return res
124
+
125
+ # Function to cumulate processing times for a given sequence of jobs
126
+ def cumulate_seq(seq, jobs_list):
127
+ cumulated = None
128
+ for i in seq:
129
+ cumulated = cumulate(jobs_list[i], cumulated)
130
+ return cumulated
131
+
132
+ # Function to compute the makespan given a sequence of jobs and the job list
133
+ def makespan(sequence, job_list):
134
+ return cumulate_seq(sequence, job_list)[-1]
135
+
136
+ # Function to perform the Johnson's algorithm for the flow shop problem
137
+ def johnson_algorithm(matrix):
138
+ n = matrix.shape[0]
139
+ sequence = []
140
+ machines = [[], []]
141
+
142
+ # Preprocessing to determine the order of jobs
143
+ for i in range(n):
144
+ if matrix[i][0] < matrix[i][1]: # if time(m1) < time(m2)
145
+ machines[0].append((matrix[i][0], i))
146
+ else:
147
+ machines[1].append((matrix[i][1], i))
148
+
149
+ # Sorting jobs for each machine
150
+ machines[0] = sorted(
151
+ machines[0], key=lambda x: x[0]
152
+ ) # ascending sort for the first machine
153
+ machines[1] = sorted(
154
+ machines[1], key=lambda x: x[0], reverse=True
155
+ ) # descending sort for the second machine
156
+
157
+ # Merging the two sorted lists
158
+ merged = machines[0] + machines[1]
159
+
160
+ # Constructing the optimal sequence
161
+ sequence = [index for _, index in merged]
162
+
163
+ return sequence
164
+
165
+ # Function that applies Johnson's algorithm and computes the makespan
166
+ def johnson(job_matrix, data_matrix):
167
+ sequence = johnson_algorithm(job_matrix)
168
+ return sequence, makespan(sequence, data_matrix)
169
+
170
+ # CDS heuristic
171
+ def cds_heuristic(matrix):
172
+ n = matrix.shape[0]
173
+ m = matrix.shape[1]
174
+ best_makespan = float("inf")
175
+ best_sequences = []
176
+
177
+ # Step 1: Generate matrices of all possible job lists
178
+ for i in range(1, m):
179
+ machine_subset_1 = matrix[:, :i].sum(axis=1)
180
+ machine_subset_2 = matrix[:, -i:].sum(axis=1)
181
+ job_matrix = np.column_stack((machine_subset_1, machine_subset_2))
182
+
183
+ # Step 2: Apply Johnson's algorithm to the job matrix abd calculate the makespan
184
+ sequence, makespan_value = johnson(job_matrix, matrix)
185
+
186
+ # Step 3: Update the best makespan and corresponding sequences
187
+ if makespan_value < best_makespan:
188
+ best_makespan = makespan_value
189
+ best_sequences = [sequence]
190
+ elif makespan_value == best_makespan:
191
+ best_sequences.append(sequence)
192
+
193
+ return best_sequences[0], best_makespan
194
+
195
+ # =====================================================================================
196
+
197
+ # Interfacing with the underlying old cds code
198
+ cds_schedule, cds_makespan = cds_heuristic(pfsp_instance)
199
+
200
+ # Returning the schedule and makespan as numpy arrays of type int32
201
+ return np.array(cds_schedule, dtype=np.int32), np.int32(cds_makespan)
202
+
203
+
204
+ def fit_neh(pfsp_instance: np.ndarray):
205
+ """
206
+ Implements NEH heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
207
+ For now I am using an old code that performs neh by interfacing with it, but it should be refactored to be cleaner and more efficient.
208
+ Parameters:
209
+ - pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
210
+ Returns:
211
+ - A tuple (schedule, makespan) where:
212
+ - schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
213
+ - makespan: The total completion time for the given schedule.
214
+ """
215
+
216
+ # =====================================================================================
217
+ class Inst:
218
+ def __init__(
219
+ self,
220
+ jobs: int,
221
+ machines: int,
222
+ seed: int,
223
+ ub: int,
224
+ lb: int,
225
+ matrix: list[list[int]],
226
+ ):
227
+ self.jobs = jobs
228
+ self.machines = machines
229
+ self.seed = seed
230
+ self.ub = ub
231
+ self.lb = lb
232
+ self.matrix = matrix
233
+
234
+ def __repr__(self) -> str:
235
+ return f"Inst(jobs={self.jobs}, machines={self.machines}, seed={self.seed}, ub={self.ub}, lb={self.lb}, matrix={self.matrix})"
236
+
237
+ class NEH:
238
+ def __init__(self, instance: Inst, debug: bool = False):
239
+ self.instance = instance
240
+ self.debug = debug
241
+
242
+ def calculate_sj(self, job: int) -> int:
243
+ sj = 0
244
+ for machine in range(self.instance.machines):
245
+ sj += self.instance.matrix[machine][job]
246
+ return sj
247
+
248
+ def sort_jobs(self, reverse: bool = False) -> list[int]:
249
+ return sorted(
250
+ range(self.instance.jobs),
251
+ key=lambda job: self.calculate_sj(job),
252
+ reverse=reverse,
253
+ )
254
+
255
+ def emulate(self, jobs: list[int]) -> list[int]:
256
+ machines_exec = [0] * self.instance.machines
257
+ for job in jobs:
258
+ for current_machine in range(self.instance.machines):
259
+ # Add jobs execution time to current machine
260
+ machines_exec[current_machine] += self.instance.matrix[
261
+ current_machine
262
+ ][job]
263
+
264
+ # Sync other machines if they are behind current time
265
+ for machine in range(current_machine + 1, self.instance.machines):
266
+ machines_exec[machine] = max(
267
+ machines_exec[current_machine], machines_exec[machine]
268
+ )
269
+
270
+ return machines_exec
271
+
272
+ def calculate_cmax(self, jobs: list[int]) -> int:
273
+ return self.emulate(jobs)[-1]
274
+
275
+ def get_best_order(self, orders: list[list[int]]) -> tuple[int, list[int]]:
276
+ min_cmax = float("inf")
277
+ min_order = None
278
+ for order in orders:
279
+ cmax = self.calculate_cmax(order)
280
+ if cmax < min_cmax:
281
+ min_cmax = cmax
282
+ min_order = order
283
+
284
+ return min_cmax, min_order
285
+
286
+ def get_best_position(
287
+ self, order: list[int], job: int
288
+ ) -> tuple[int, list[int]]:
289
+ possible_orders: list[list[int]] = []
290
+ for pos in range(len(order) + 1):
291
+ possible_orders.append(order[:pos] + [job] + order[pos:])
292
+
293
+ return self.get_best_order(possible_orders)
294
+
295
+ def __call__(self) -> tuple[int, list[int]]:
296
+ if self.instance.jobs < 2:
297
+ raise ValueError("Number of jobs must be greater than 2")
298
+
299
+ sorted_jobs = self.sort_jobs()
300
+ current_cmax, current_order = self.get_best_order(
301
+ [sorted_jobs[:2], sorted_jobs[:2][::-1]]
302
+ )
303
+
304
+ if self.debug:
305
+ print(current_cmax, current_order)
306
+
307
+ if self.instance.jobs == 2:
308
+ return current_cmax, current_order
309
+
310
+ for job in sorted_jobs[2:]:
311
+ current_cmax, current_order = self.get_best_position(current_order, job)
312
+ if self.debug:
313
+ print(current_cmax, current_order)
314
+
315
+ return current_cmax, current_order
316
+
317
+ # =====================================================================================
318
+
319
+ # Interfacing with the underlying old neh code
320
+ neh_instance_jobs = pfsp_instance.shape[0]
321
+ neh_instance_machines = pfsp_instance.shape[1]
322
+ neh_instance_matrix = pfsp_instance.T.tolist()
323
+ neh_instance = Inst(
324
+ neh_instance_jobs,
325
+ neh_instance_machines,
326
+ seed=0,
327
+ ub=0,
328
+ lb=0,
329
+ matrix=neh_instance_matrix,
330
+ )
331
+ neh_makespan, neh_schedule = NEH(neh_instance)()
332
+
333
+ # Returning the schedule and makespan as numpy arrays of type int32
334
+ return np.array(neh_schedule, dtype=np.int32), np.int32(neh_makespan)
335
+
336
+
337
+ def evaluate_makespan(pfsp_instance, schedule):
338
+ """
339
+ Evaluates the makespan (completion time) of a given schedule for a given pfsp_instance.
340
+ Parameters:
341
+ - pfsp_instance: A list of lists, where pfsp_instance[i][j] is the processing time of job i on machine j.
342
+ - schedule: A list/tuple indicating the order of jobs (e.g., [0, 2, 1]).
343
+ Returns:
344
+ - The makespan (total completion time) for the given schedule.
345
+ """
346
+
347
+ def cumulate(job: list, previous_cumul=None):
348
+ # Calculate the cumulative completion times for a job
349
+
350
+ res = [0] * len(job)
351
+ if previous_cumul == None:
352
+ res[0] = job[0]
353
+ for i in range(1, len(job)):
354
+ res[i] = res[i - 1] + job[i]
355
+ else:
356
+ res[0] = previous_cumul[0] + job[0]
357
+ for i in range(1, len(job)):
358
+ res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
359
+ return res
360
+
361
+ def cumulate_seq(pfsp_instance: list, schedule: list):
362
+ # Calculates the cumulative time for a sequence of jobs on machines.
363
+
364
+ cumulated = None
365
+ for i in schedule:
366
+ cumulated = cumulate(pfsp_instance[i], cumulated)
367
+ return cumulated
368
+
369
+ cumulative = cumulate_seq(pfsp_instance, schedule)
370
+ return cumulative[-1]
371
+
372
+
373
+ def create_dataset(
374
+ pfsp_instance,
375
+ nb_samples,
376
+ init_type,
377
+ data_folder_location,
378
+ data_folder_name=None,
379
+ seed=97
380
+ ):
381
+ np.random.seed(seed)
382
+
383
+ def perturb_schedule(schedule):
384
+ perturbed_schedule = schedule[:]
385
+ i, j = np.random.choice(perturbed_schedule.shape[0], size=2, replace=False)
386
+ perturbed_schedule[[i,j]] = perturbed_schedule[[j,i]]
387
+ return perturbed_schedule, evaluate_makespan(pfsp_instance, perturbed_schedule)
388
+
389
+ # Create the folder if it doesn't exist
390
+ if data_folder_name is None: data_folder_name = f"ftdataset_{str(np.datetime64('now'))}"
391
+ data_path = os.path.join(data_folder_location, data_folder_name)
392
+ os.makedirs(data_path, exist_ok=True)
393
+
394
+ # Create the np memmap files for schedules and makespans
395
+ nb_jobs = pfsp_instance.shape[0]
396
+ schedules = np.memmap(os.path.join(data_path,"schedules.bin"), dtype=np.int32, mode='w+', shape=(nb_samples, nb_jobs))
397
+ makespans = np.memmap(os.path.join(data_path,"makespans.bin"), dtype=np.int32, mode='w+', shape=(nb_samples,))
398
+
399
+ # Save the pfsp instance as a numpy file
400
+ np.save(os.path.join(data_path,"pfsp_instance.npy"), pfsp_instance)
401
+
402
+ # Create a metadata dictionary and save it as a json file
403
+ metadata_dict = {
404
+ "nb_samples": nb_samples,
405
+ "nb_jobs": nb_jobs,
406
+ "nb_machines": pfsp_instance.shape[1],
407
+ "init_type": init_type,
408
+ "data_path": data_path,
409
+ "seed": seed,
410
+ "date_time": str(np.datetime64('now'))
411
+ }
412
+
413
+ with open(os.path.join(data_path,"metadata.json"), "w") as f:
414
+ json.dump(metadata_dict, f, indent=4)
415
+
416
+ if init_type == "cds":
417
+ cds_schedule, cds_makespan = fit_cds(pfsp_instance)
418
+ schedules[0] = cds_schedule
419
+ makespans[0] = cds_makespan
420
+ for i in range(1, nb_samples):
421
+ schedules[i], makespans[i] = perturb_schedule(cds_schedule)
422
+
423
+ elif init_type == "palmer":
424
+ palmer_schedule, palmer_makespan = fit_palmer(pfsp_instance)
425
+ schedules[0] = palmer_schedule
426
+ makespans[0] = palmer_makespan
427
+ for i in range(1, nb_samples):
428
+ schedules[i], makespans[i] = perturb_schedule(palmer_schedule)
429
+
430
+ elif init_type == "neh":
431
+ neh_schedule, neh_makespan = fit_neh(pfsp_instance)
432
+ schedules[0] = neh_schedule
433
+ makespans[0] = neh_makespan
434
+ for i in range(1, nb_samples):
435
+ schedules[i], makespans[i] = perturb_schedule(neh_schedule)
436
+
437
+ elif init_type == "heuristics":
438
+ cds_schedule, cds_makespan = fit_cds(pfsp_instance)
439
+ schedules[0], makespans[0] = cds_schedule, cds_makespan
440
+ cds_size = nb_samples // 3
441
+ for i in range(1, cds_size):
442
+ print("cds", i)
443
+ schedules[i], makespans[i] = perturb_schedule(cds_schedule)
444
+ i+=1
445
+ palmer_schedule, palmer_makespan = fit_palmer(pfsp_instance)
446
+ schedules[i], makespans[i] = palmer_schedule, palmer_makespan
447
+ palmer_size = nb_samples // 3
448
+ for i in range(i+1, i+palmer_size):
449
+ print("palmer", i)
450
+ schedules[i], makespans[i] = perturb_schedule(palmer_schedule)
451
+ i+=1
452
+ neh_schedule, neh_makespan = fit_neh(pfsp_instance)
453
+ schedules[i], makespans[i] = neh_schedule, neh_makespan
454
+ neh_size = nb_samples - cds_size - palmer_size
455
+ for i in range(i+1, i+neh_size):
456
+ print("neh", i)
457
+ schedules[i], makespans[i] = perturb_schedule(neh_schedule)
458
+
459
+ elif init_type == "random":
460
+ for i in range(nb_samples):
461
+ schedule = np.random.permutation(pfsp_instance.shape[0])
462
+ makespan = evaluate_makespan(pfsp_instance, schedule)
463
+ schedules[i] = schedule
464
+ makespans[i] = makespan
465
+
466
+ else:
467
+ raise ValueError("Invalid initialization type")
468
+
469
+ schedules.flush()
470
+ makespans.flush()
471
+ return schedules, makespans