import numpy as np import pandas as pd np.random.seed(42) epsilon = 1e-8 class Dataset: """ Base dataset class. Subclasses must implement: - _load_dataframe() - _get_columns() """ def __init__(self, inverse=False): self.inverse = inverse self.df = self._load_dataframe() self.input_columns, self.output_columns = self._get_columns() self._compute_stats() def _load_dataframe(self): raise NotImplementedError def _get_columns(self): raise NotImplementedError def _compute_stats(self): self.input_mean = self.df[self.input_columns].mean().to_numpy(dtype=np.float32) self.input_std = self.df[self.input_columns].std().to_numpy(dtype=np.float32) + epsilon self.output_mean = self.df[self.output_columns].mean().to_numpy(dtype=np.float32) self.output_std = self.df[self.output_columns].std().to_numpy(dtype=np.float32) + epsilon def get_input(self, normalize=False): data = self.df[self.input_columns].to_numpy(dtype=np.float32) if normalize: data = self.normalize_input(data) return data def get_output(self, normalize=False): data = self.df[self.output_columns].to_numpy(dtype=np.float32) if normalize: data = self.normalize_output(data) return data def __str__(self): return str(self.df.head()) def normalize_input(self, input_data): return (input_data - self.input_mean) / self.input_std def normalize_output(self, output_data): return (output_data - self.output_mean) / self.output_std def denormalize_input(self, normalized_input): return normalized_input * self.input_std + self.input_mean def denormalize_output(self, normalized_output): return normalized_output * self.output_std + self.output_mean class DataThermoforming(Dataset): """ Dataset for thermoforming process. Materials: "CFPEEK", "CFPA6", or "CFRP" which includes both materials. """ def __init__(self, material="CFRP", inverse=False, filename="./Data/DataForThermoforming.xlsx"): self.material = material self.filename = filename self.materials_map = {"CF/PEEK": 0.0, "CF/PA6": 1.0} super().__init__(inverse=inverse) def _load_dataframe(self): df = pd.read_excel(self.filename, sheet_name=self.material) df["Materials"] = df["Materials"].map(self.materials_map).astype(np.float32) if self.material == "CFPEEK" or self.material == "CFRP": df = df.drop([7, 78, 101, 129], axis=0) return df def _get_columns(self): if self.inverse: input_columns = [ "Materials", "Ply_Number", "Fiber_Volume_Fractions", "A1(abs)", "B1(abs)", "C1(abs)", "Stress(Max) MPa", ] output_columns = [ "Initial_Temp (degree celsius)", "Punch_Velocity (mm/s)", "Cooling_Time (s)", ] else: input_columns = [ "Ply_Number", "Fiber_Volume_Fractions", "Initial_Temp (degree celsius)", "Punch_Velocity (mm/s)", "Cooling_Time (s)", ] output_columns = ["A1(abs)", "B1(abs)", "C1(abs)", "Stress(Max) MPa"] return input_columns, output_columns class DataAdditiveManufacturing(Dataset): def __init__(self, inverse=False, filename="./Data/FDM_192_Simulation_Matrix_Shared.xlsx"): self.filename = filename self.material_base_map = {"HDPE": 0.0, "PP": 1.0} self.fiber_type_map = {"CF": 0.0, "GF": 1.0} self.build_direction_map = {"Vertical": 1.0, "Horizontal": 0.0} super().__init__(inverse=inverse) def _load_dataframe(self): df = pd.read_excel(self.filename, sheet_name="Batch_1") df["Material_Base"] = df["Material_Base"].map(self.material_base_map).astype(np.float32) df["Fiber_Type"] = df["Fiber_Type"].map(self.fiber_type_map).astype(np.float32) df["Build_Direction"] = df["Build_Direction"].map(self.build_direction_map).astype(np.float32) return df def _get_columns(self): if self.inverse: input_columns = [ "Phi1_Change", "Phi2_Change", "Phi3_Change", "Phi7_Change", "Phi8_Change", "Phi9_Change", "Global_Max_Stress" ] output_columns = [ "Material_Base", "Fiber_Type", "Vol_Fraction", # "Build_Direction", "Extruder_Temp", "Velocity", "Bed_Temp" ] else: input_columns = [ "Material_Base", "Fiber_Type", "Vol_Fraction", "Build_Direction", "Extruder_Temp", "Velocity", "Bed_Temp" ] output_columns = [ # "Phi1_Change", # "Phi2_Change", # "Phi3_Change", "Phi7_Change", "Phi8_Change", "Phi9_Change", "Global_Max_Stress" ] return input_columns, output_columns if __name__ == "__main__": dataset = DataAdditiveManufacturing() input_data = dataset.get_input(normalize=False) output_data = dataset.get_output(normalize=False) print("Input shape:", input_data.shape) print("Output shape:", output_data.shape)