File size: 1,723 Bytes
e573a4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import os
def load_and_process_data(data_path='CMaps/train_FD001.txt'):
"""
Load and preprocess the NASA Turbofan dataset
"""
print("Loading and processing data...")
# Define column names
columns = ['id', 'cycle', 'op1', 'op2', 'op3'] + [f'sensor{i}' for i in range(1, 22)]
if not os.path.exists(data_path):
raise FileNotFoundError(f"Data file {data_path} not found. Please download NASA Turbofan dataset.")
df = pd.read_csv(data_path, sep=' ', header=None, names=columns)
df.dropna(axis=1, inplace=True) # Remove extra NaN columns
# Normalize sensor readings per engine
sensor_cols = [f'sensor{i}' for i in range(1, 20)]
df[sensor_cols] = df.groupby('id')[sensor_cols].transform(
lambda x: (x - x.mean()) / (x.std() + 1e-6)
)
print(f"Processed data shape: {df.shape}")
return df, sensor_cols
def save_processed_data(df, filepath='processed_data.csv'):
"""
Save processed data to CSV
"""
df.to_csv(filepath, index=False)
print(f"Processed data saved to {filepath}")
def load_processed_data(filepath='processed_data.csv'):
"""
Load processed data from CSV
"""
if not os.path.exists(filepath):
return None, None
df = pd.read_csv(filepath)
sensor_cols = [f'sensor{i}' for i in range(1, 22)]
return df, sensor_cols
if __name__ == "__main__":
# Test the data processor
try:
df, sensor_cols = load_and_process_data()
save_processed_data(df)
print("Data processing completed successfully!")
except Exception as e:
print(f"Error in data processing: {e}")
|