younadi's picture
Rearranged utilitary scripts into utils/
cc98246
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import os
import json
def generate_random_pfsp_instance(nb_jobs, nb_machines, time_min, time_max):
"""
Generates a random instance of the Permutation Flow Shop Problem (PFSP).
Parameters:
- nb_jobs: Number of jobs (n).
- nb_machines: Number of machines (m).
- time_min: Minimum processing time for any job on any machine.
- time_max: Maximum processing time for any job on any machine.
Returns:
- A 2D list (matrix) of size (nb_jobs x nb_machines) where each entry is a random processing time between time_min and time_max.
"""
return np.random.randint(time_min, time_max + 1, size=(nb_jobs, nb_machines))
def fit_palmer(pfsp_instance: np.ndarray):
"""
Implements Palmer's heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
For now I am using an old code that performs palmer by interfacing with it, but it should be refactored to be cleaner and more efficient.
Parameters:
- pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
Returns:
- A tuple (schedule, makespan) where:
- schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
- makespan: The total completion time for the given schedule.
"""
# =====================================================================================
class Palmer:
def __init__(self, jobs_list: list):
self.jobs_list = jobs_list
self.nb_jobs = len(jobs_list)
self.nb_machines = len(jobs_list[0])
self.seq_star = None
self.make_span_star = None
# utility function that returns the gantt cumule based on a job execution times and a previous gantt cumule
def cumulate(self, job: list, previous_cumul=None):
res = [0] * len(job)
if previous_cumul == None:
res[0] = job[0]
for i in range(1, len(job)):
res[i] = res[i - 1] + job[i]
else:
res[0] = previous_cumul[0] + job[0]
for i in range(1, len(job)):
res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
return res
# utility function that computes the gantt cumule given only a job sequence (not used in the algorithm due to inneficiency
# dynamic programming with cumulate is used instead ...)
def cumulate_seq(self, seq: list):
cumulated = None
for i in seq:
cumulated = self.cumulate(self.jobs_list[i], cumulated)
return cumulated
# launching the optimization
def optim(self, debug=False):
jobs_weights = []
for i, job in zip(range(self.nb_jobs), self.jobs_list):
weight = 0
for j in range(self.nb_machines):
if debug == True:
print(
f">job {i} mach {j} first term: {(2*(j+1) - 1) - self.nb_machines}"
)
print(f">job {i} mach {j} second term: {job[j]}")
print(
"------------------------------------------------------------------"
)
weight += ((2 * (j + 1) - 1) - self.nb_machines) * job[j]
if debug == True:
print(f"===>> job {i} weight: {weight}")
jobs_weights.append((weight, i))
self.seq_star = [tu[1] for tu in sorted(jobs_weights, reverse=True)]
self.make_span_star = self.cumulate_seq(self.seq_star)[-1]
return (self.seq_star, self.make_span_star)
# =====================================================================================
# Interfacing with the underlying old palmer code
jobs_list = pfsp_instance.tolist()
palmer_schedule, palmer_makespan = Palmer(jobs_list).optim()
# Returning the schedule and makespan as numpy arrays of type int32
return np.array(palmer_schedule, dtype=np.int32), np.int32(palmer_makespan)
def fit_cds(pfsp_instance: np.ndarray):
"""
Implements CDS heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
For now I am using an old code that performs cds by interfacing with it, but it should be refactored to be cleaner and more efficient.
Parameters:
- pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
Returns:
- A tuple (schedule, makespan) where:
- schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
- makespan: The total completion time for the given schedule.
"""
# =====================================================================================
# Function to cumulate job processing times
def cumulate(job, previous_cumul=None):
res = [0] * len(job)
if previous_cumul is None:
res[0] = job[0]
for i in range(1, len(job)):
res[i] = res[i - 1] + job[i]
else:
res[0] = previous_cumul[0] + job[0]
for i in range(1, len(job)):
res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
return res
# Function to cumulate processing times for a given sequence of jobs
def cumulate_seq(seq, jobs_list):
cumulated = None
for i in seq:
cumulated = cumulate(jobs_list[i], cumulated)
return cumulated
# Function to compute the makespan given a sequence of jobs and the job list
def makespan(sequence, job_list):
return cumulate_seq(sequence, job_list)[-1]
# Function to perform the Johnson's algorithm for the flow shop problem
def johnson_algorithm(matrix):
n = matrix.shape[0]
sequence = []
machines = [[], []]
# Preprocessing to determine the order of jobs
for i in range(n):
if matrix[i][0] < matrix[i][1]: # if time(m1) < time(m2)
machines[0].append((matrix[i][0], i))
else:
machines[1].append((matrix[i][1], i))
# Sorting jobs for each machine
machines[0] = sorted(
machines[0], key=lambda x: x[0]
) # ascending sort for the first machine
machines[1] = sorted(
machines[1], key=lambda x: x[0], reverse=True
) # descending sort for the second machine
# Merging the two sorted lists
merged = machines[0] + machines[1]
# Constructing the optimal sequence
sequence = [index for _, index in merged]
return sequence
# Function that applies Johnson's algorithm and computes the makespan
def johnson(job_matrix, data_matrix):
sequence = johnson_algorithm(job_matrix)
return sequence, makespan(sequence, data_matrix)
# CDS heuristic
def cds_heuristic(matrix):
n = matrix.shape[0]
m = matrix.shape[1]
best_makespan = float("inf")
best_sequences = []
# Step 1: Generate matrices of all possible job lists
for i in range(1, m):
machine_subset_1 = matrix[:, :i].sum(axis=1)
machine_subset_2 = matrix[:, -i:].sum(axis=1)
job_matrix = np.column_stack((machine_subset_1, machine_subset_2))
# Step 2: Apply Johnson's algorithm to the job matrix abd calculate the makespan
sequence, makespan_value = johnson(job_matrix, matrix)
# Step 3: Update the best makespan and corresponding sequences
if makespan_value < best_makespan:
best_makespan = makespan_value
best_sequences = [sequence]
elif makespan_value == best_makespan:
best_sequences.append(sequence)
return best_sequences[0], best_makespan
# =====================================================================================
# Interfacing with the underlying old cds code
cds_schedule, cds_makespan = cds_heuristic(pfsp_instance)
# Returning the schedule and makespan as numpy arrays of type int32
return np.array(cds_schedule, dtype=np.int32), np.int32(cds_makespan)
def fit_neh(pfsp_instance: np.ndarray):
"""
Implements NEH heuristic for the flowshop scheduling problem. Returns a schedule and its corresponding makespan.
For now I am using an old code that performs neh by interfacing with it, but it should be refactored to be cleaner and more efficient.
Parameters:
- pfsp_instance: A 2D numpy array where pfsp_instance[i][j] is the processing time of job i on machine j.
Returns:
- A tuple (schedule, makespan) where:
- schedule: A list of job indices representing the order of jobs (e.g., [0, 2, 1]).
- makespan: The total completion time for the given schedule.
"""
# =====================================================================================
class Inst:
def __init__(
self,
jobs: int,
machines: int,
seed: int,
ub: int,
lb: int,
matrix: list[list[int]],
):
self.jobs = jobs
self.machines = machines
self.seed = seed
self.ub = ub
self.lb = lb
self.matrix = matrix
def __repr__(self) -> str:
return f"Inst(jobs={self.jobs}, machines={self.machines}, seed={self.seed}, ub={self.ub}, lb={self.lb}, matrix={self.matrix})"
class NEH:
def __init__(self, instance: Inst, debug: bool = False):
self.instance = instance
self.debug = debug
def calculate_sj(self, job: int) -> int:
sj = 0
for machine in range(self.instance.machines):
sj += self.instance.matrix[machine][job]
return sj
def sort_jobs(self, reverse: bool = False) -> list[int]:
return sorted(
range(self.instance.jobs),
key=lambda job: self.calculate_sj(job),
reverse=reverse,
)
def emulate(self, jobs: list[int]) -> list[int]:
machines_exec = [0] * self.instance.machines
for job in jobs:
for current_machine in range(self.instance.machines):
# Add jobs execution time to current machine
machines_exec[current_machine] += self.instance.matrix[
current_machine
][job]
# Sync other machines if they are behind current time
for machine in range(current_machine + 1, self.instance.machines):
machines_exec[machine] = max(
machines_exec[current_machine], machines_exec[machine]
)
return machines_exec
def calculate_cmax(self, jobs: list[int]) -> int:
return self.emulate(jobs)[-1]
def get_best_order(self, orders: list[list[int]]) -> tuple[int, list[int]]:
min_cmax = float("inf")
min_order = None
for order in orders:
cmax = self.calculate_cmax(order)
if cmax < min_cmax:
min_cmax = cmax
min_order = order
return min_cmax, min_order
def get_best_position(
self, order: list[int], job: int
) -> tuple[int, list[int]]:
possible_orders: list[list[int]] = []
for pos in range(len(order) + 1):
possible_orders.append(order[:pos] + [job] + order[pos:])
return self.get_best_order(possible_orders)
def __call__(self) -> tuple[int, list[int]]:
if self.instance.jobs < 2:
raise ValueError("Number of jobs must be greater than 2")
sorted_jobs = self.sort_jobs()
current_cmax, current_order = self.get_best_order(
[sorted_jobs[:2], sorted_jobs[:2][::-1]]
)
if self.debug:
print(current_cmax, current_order)
if self.instance.jobs == 2:
return current_cmax, current_order
for job in sorted_jobs[2:]:
current_cmax, current_order = self.get_best_position(current_order, job)
if self.debug:
print(current_cmax, current_order)
return current_cmax, current_order
# =====================================================================================
# Interfacing with the underlying old neh code
neh_instance_jobs = pfsp_instance.shape[0]
neh_instance_machines = pfsp_instance.shape[1]
neh_instance_matrix = pfsp_instance.T.tolist()
neh_instance = Inst(
neh_instance_jobs,
neh_instance_machines,
seed=0,
ub=0,
lb=0,
matrix=neh_instance_matrix,
)
neh_makespan, neh_schedule = NEH(neh_instance)()
# Returning the schedule and makespan as numpy arrays of type int32
return np.array(neh_schedule, dtype=np.int32), np.int32(neh_makespan)
def create_dataset(
pfsp_instance,
nb_samples,
init_type,
data_folder_location,
data_folder_name=None,
seed=97
):
np.random.seed(seed)
def perturb_schedule(schedule):
perturbed_schedule = schedule[:]
i, j = np.random.choice(perturbed_schedule.shape[0], size=2, replace=False)
perturbed_schedule[[i,j]] = perturbed_schedule[[j,i]]
return perturbed_schedule, evaluate_makespan(pfsp_instance, perturbed_schedule)
# Create the folder if it doesn't exist
if data_folder_name is None: data_folder_name = f"ftdataset_{str(np.datetime64('now'))}"
data_path = os.path.join(data_folder_location, data_folder_name)
os.makedirs(data_path, exist_ok=True)
# Create the np memmap files for schedules and makespans
nb_jobs = pfsp_instance.shape[0]
schedules = np.memmap(os.path.join(data_path,"schedules.bin"), dtype=np.int32, mode='w+', shape=(nb_samples, nb_jobs))
makespans = np.memmap(os.path.join(data_path,"makespans.bin"), dtype=np.int32, mode='w+', shape=(nb_samples,))
# Save the pfsp instance as a numpy file
np.save(os.path.join(data_path,"pfsp_instance.npy"), pfsp_instance)
# Create a metadata dictionary and save it as a json file
metadata_dict = {
"nb_samples": nb_samples,
"nb_jobs": nb_jobs,
"nb_machines": pfsp_instance.shape[1],
"init_type": init_type,
"data_path": data_path,
"seed": seed,
"date_time": str(np.datetime64('now'))
}
with open(os.path.join(data_path,"metadata.json"), "w") as f:
json.dump(metadata_dict, f, indent=4)
if init_type == "cds":
cds_schedule, cds_makespan = fit_cds(pfsp_instance)
schedules[0] = cds_schedule
makespans[0] = cds_makespan
for i in range(1, nb_samples):
schedules[i], makespans[i] = perturb_schedule(cds_schedule)
elif init_type == "palmer":
palmer_schedule, palmer_makespan = fit_palmer(pfsp_instance)
schedules[0] = palmer_schedule
makespans[0] = palmer_makespan
for i in range(1, nb_samples):
schedules[i], makespans[i] = perturb_schedule(palmer_schedule)
elif init_type == "neh":
neh_schedule, neh_makespan = fit_neh(pfsp_instance)
schedules[0] = neh_schedule
makespans[0] = neh_makespan
for i in range(1, nb_samples):
schedules[i], makespans[i] = perturb_schedule(neh_schedule)
elif init_type == "heuristics":
cds_schedule, cds_makespan = fit_cds(pfsp_instance)
schedules[0], makespans[0] = cds_schedule, cds_makespan
cds_size = nb_samples // 3
for i in range(1, cds_size):
print("cds", i)
schedules[i], makespans[i] = perturb_schedule(cds_schedule)
i+=1
palmer_schedule, palmer_makespan = fit_palmer(pfsp_instance)
schedules[i], makespans[i] = palmer_schedule, palmer_makespan
palmer_size = nb_samples // 3
for i in range(i+1, i+palmer_size):
print("palmer", i)
schedules[i], makespans[i] = perturb_schedule(palmer_schedule)
i+=1
neh_schedule, neh_makespan = fit_neh(pfsp_instance)
schedules[i], makespans[i] = neh_schedule, neh_makespan
neh_size = nb_samples - cds_size - palmer_size
for i in range(i+1, i+neh_size):
print("neh", i)
schedules[i], makespans[i] = perturb_schedule(neh_schedule)
elif init_type == "random":
for i in range(nb_samples):
schedule = np.random.permutation(pfsp_instance.shape[0])
makespan = evaluate_makespan(pfsp_instance, schedule)
schedules[i] = schedule
makespans[i] = makespan
else:
raise ValueError("Invalid initialization type")
schedules.flush()
makespans.flush()
return schedules, makespans
def evaluate_makespan(pfsp_instance, schedule):
"""
Evaluates the makespan (completion time) of a given schedule for a given pfsp_instance.
Parameters:
- pfsp_instance: A list of lists, where pfsp_instance[i][j] is the processing time of job i on machine j.
- schedule: A list/tuple indicating the order of jobs (e.g., [0, 2, 1]).
Returns:
- The makespan (total completion time) for the given schedule.
"""
def cumulate(job: list, previous_cumul=None):
# Calculate the cumulative completion times for a job
res = [0] * len(job)
if previous_cumul == None:
res[0] = job[0]
for i in range(1, len(job)):
res[i] = res[i - 1] + job[i]
else:
res[0] = previous_cumul[0] + job[0]
for i in range(1, len(job)):
res[i] = max(res[i - 1], previous_cumul[i]) + job[i]
return res
def cumulate_seq(pfsp_instance: list, schedule: list):
# Calculates the cumulative time for a sequence of jobs on machines.
cumulated = None
for i in schedule:
cumulated = cumulate(pfsp_instance[i], cumulated)
return cumulated
cumulative = cumulate_seq(pfsp_instance, schedule)
return cumulative[-1]
def plot_flowshop_gantt(T, schedule, save_path):
"""
Plots a Gantt chart for a Permutation Flow Shop scheduling problem,
including the Makespan (Termination Time).
Parameters:
- T: Matrix where T[i,j] is the processing time of job i on machine j.
- schedule: A list/tuple indicating the order of jobs (e.g., [0, 2, 1]).
# --- Example Usage ---
T_matrix = [
[2, 5, 7],
[12, 3, 8],
[5, 20, 4]
]
job_schedule = (0,2,1)
plot_flowshop_gantt(T_matrix, job_schedule, "./presentation/schemas/gantt_0_2_1.png")
"""
T = np.array(T)
num_jobs, num_machines = T.shape
# Organize data structures to store start and end times
start_times = np.zeros((num_jobs, num_machines))
end_times = np.zeros((num_jobs, num_machines))
# --- 1. Scheduling Logic (Calculate Times) ---
machine_avail_time = np.zeros(num_machines)
job_avail_time = np.zeros(num_jobs)
for job_idx in schedule:
for machine_idx in range(num_machines):
# A job can start only when:
# A) The machine is free AND B) The job has finished on the previous machine
start_t = max(machine_avail_time[machine_idx], job_avail_time[job_idx])
duration = T[job_idx, machine_idx]
end_t = start_t + duration
start_times[job_idx, machine_idx] = start_t
end_times[job_idx, machine_idx] = end_t
machine_avail_time[machine_idx] = end_t
job_avail_time[job_idx] = end_t
# CALCULATE MAKESPAN
makespan = np.max(end_times)
# --- 2. Visualization (Gantt Chart) ---
fig, ax = plt.subplots(figsize=(12, 6))
# Colors for machines
colors = plt.cm.tab10(np.linspace(0, 1, num_machines))
# Plot "Working" bars
for job_idx in range(num_jobs):
for machine_idx in range(num_machines):
start = start_times[job_idx, machine_idx]
duration = T[job_idx, machine_idx]
if duration > 0:
ax.barh(
y=job_idx,
width=duration,
left=start,
height=0.5,
color=colors[machine_idx],
edgecolor="black",
align="center",
)
# Label inside the bar
ax.text(
start + duration / 2,
job_idx,
f"M{machine_idx}",
ha="center",
va="center",
color="white",
fontweight="bold",
fontsize=9,
)
# Plot "Waiting" times (gaps)
for job_idx in range(num_jobs):
# Gap before first machine
if start_times[job_idx, 0] > 0:
ax.barh(
job_idx,
start_times[job_idx, 0],
left=0,
height=0.2,
color="lightgray",
hatch="///",
)
# Gaps between machines
for m in range(num_machines - 1):
finish_prev = end_times[job_idx, m]
start_next = start_times[job_idx, m + 1]
if start_next > finish_prev:
ax.barh(
y=job_idx,
width=start_next - finish_prev,
left=finish_prev,
height=0.2,
color="lightgray",
hatch="///",
align="center",
)
# --- 3. Add Makespan Line and Label ---
# Draw vertical line
ax.axvline(x=makespan, color="red", linestyle="--", linewidth=2, zorder=5)
# Add text annotation near the top
ax.text(
makespan,
num_jobs - 0.5,
f" Makespan: {makespan}",
color="red",
fontweight="bold",
va="bottom",
)
# Force the X-axis to include the exact Makespan value as a tick
current_xticks = list(ax.get_xticks())
# Add makespan to ticks if not too close to existing ones
if not any(abs(x - makespan) < 0.5 for x in current_xticks):
current_xticks.append(makespan)
current_xticks.sort()
# Filter out ticks that are way out of range (optional cleanup)
current_xticks = [x for x in current_xticks if x >= 0 and x <= makespan * 1.1]
ax.set_xticks(current_xticks)
# Formatting
ax.set_yticks(range(num_jobs))
ax.set_yticklabels([f"Job {i}" for i in range(num_jobs)])
ax.set_xlabel("Time")
ax.set_ylabel("Jobs")
ax.set_title(f"Flow Shop Schedule: {schedule} | Total Makespan: {makespan}")
ax.grid(axis="x", linestyle="--", alpha=0.5)
# Legend
patches = [
mpatches.Patch(color=colors[i], label=f"Machine {i}")
for i in range(num_machines)
]
patches.append(mpatches.Patch(facecolor="lightgray", hatch="///", label="Waiting"))
patches.append(plt.Line2D([0], [0], color="red", linestyle="--", label="Makespan"))
ax.legend(
handles=patches, loc="upper left"
) # Moved legend to avoid covering makespan text
plt.tight_layout()
plt.savefig(save_path) # Save the figure with a filename based on the schedule