import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler import os def load_and_process_data(data_path='CMaps/train_FD001.txt'): """ Load and preprocess the NASA Turbofan dataset """ print("Loading and processing data...") # Define column names columns = ['id', 'cycle', 'op1', 'op2', 'op3'] + [f'sensor{i}' for i in range(1, 22)] if not os.path.exists(data_path): raise FileNotFoundError(f"Data file {data_path} not found. Please download NASA Turbofan dataset.") df = pd.read_csv(data_path, sep=' ', header=None, names=columns) df.dropna(axis=1, inplace=True) # Remove extra NaN columns # Normalize sensor readings per engine sensor_cols = [f'sensor{i}' for i in range(1, 20)] df[sensor_cols] = df.groupby('id')[sensor_cols].transform( lambda x: (x - x.mean()) / (x.std() + 1e-6) ) print(f"Processed data shape: {df.shape}") return df, sensor_cols def save_processed_data(df, filepath='processed_data.csv'): """ Save processed data to CSV """ df.to_csv(filepath, index=False) print(f"Processed data saved to {filepath}") def load_processed_data(filepath='processed_data.csv'): """ Load processed data from CSV """ if not os.path.exists(filepath): return None, None df = pd.read_csv(filepath) sensor_cols = [f'sensor{i}' for i in range(1, 22)] return df, sensor_cols if __name__ == "__main__": # Test the data processor try: df, sensor_cols = load_and_process_data() save_processed_data(df) print("Data processing completed successfully!") except Exception as e: print(f"Error in data processing: {e}")