|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
import os |
|
|
|
|
|
def load_and_process_data(data_path='CMaps/train_FD001.txt'): |
|
|
""" |
|
|
Load and preprocess the NASA Turbofan dataset |
|
|
""" |
|
|
print("Loading and processing data...") |
|
|
|
|
|
|
|
|
columns = ['id', 'cycle', 'op1', 'op2', 'op3'] + [f'sensor{i}' for i in range(1, 22)] |
|
|
|
|
|
if not os.path.exists(data_path): |
|
|
raise FileNotFoundError(f"Data file {data_path} not found. Please download NASA Turbofan dataset.") |
|
|
|
|
|
df = pd.read_csv(data_path, sep=' ', header=None, names=columns) |
|
|
df.dropna(axis=1, inplace=True) |
|
|
|
|
|
|
|
|
sensor_cols = [f'sensor{i}' for i in range(1, 20)] |
|
|
df[sensor_cols] = df.groupby('id')[sensor_cols].transform( |
|
|
lambda x: (x - x.mean()) / (x.std() + 1e-6) |
|
|
) |
|
|
|
|
|
print(f"Processed data shape: {df.shape}") |
|
|
return df, sensor_cols |
|
|
|
|
|
def save_processed_data(df, filepath='processed_data.csv'): |
|
|
""" |
|
|
Save processed data to CSV |
|
|
""" |
|
|
df.to_csv(filepath, index=False) |
|
|
print(f"Processed data saved to {filepath}") |
|
|
|
|
|
def load_processed_data(filepath='processed_data.csv'): |
|
|
""" |
|
|
Load processed data from CSV |
|
|
""" |
|
|
if not os.path.exists(filepath): |
|
|
return None, None |
|
|
|
|
|
df = pd.read_csv(filepath) |
|
|
sensor_cols = [f'sensor{i}' for i in range(1, 22)] |
|
|
return df, sensor_cols |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
try: |
|
|
df, sensor_cols = load_and_process_data() |
|
|
save_processed_data(df) |
|
|
print("Data processing completed successfully!") |
|
|
except Exception as e: |
|
|
print(f"Error in data processing: {e}") |
|
|
|