| import pandas as pd |
| import numpy as np |
| from sklearn.model_selection import train_test_split |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.decomposition import PCA |
| from sklearn.ensemble import IsolationForest |
| from sklearn.svm import OneClassSVM |
| from sklearn.impute import SimpleImputer |
| from sklearn.metrics import roc_auc_score |
|
|
| |
| print("Loading data...") |
| df = pd.read_csv('real_tokamak_data.csv') |
|
|
| |
| |
| y = df['label'] |
| X = df.drop(['label', 'shot_id'], axis=1) |
|
|
| |
| print(f"Original Shape: {X.shape}") |
|
|
| |
| |
| X = X.loc[:, X.std() > 0] |
| print(f"Shape after dropping dead sensors: {X.shape}") |
|
|
| |
| imputer = SimpleImputer(strategy='constant', fill_value=0) |
| X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns) |
|
|
| |
| X.replace([np.inf, -np.inf], 0, inplace=True) |
| |
|
|
| |
| X_healthy = X[y == 0] |
| X_train, X_test_healthy = train_test_split(X_healthy, test_size=0.2, random_state=42) |
|
|
| |
| X_disruptive = X[y == 1] |
| X_test = pd.concat([X_test_healthy, X_disruptive]) |
| y_test = np.concatenate([np.zeros(len(X_test_healthy)), np.ones(len(X_disruptive))]) |
|
|
| |
| scaler = StandardScaler() |
| X_train_scaled = scaler.fit_transform(X_train) |
| X_test_scaled = scaler.transform(X_test) |
|
|
| |
| |
| n_components = min(10, X_train_scaled.shape[1]) |
| pca = PCA(n_components=n_components) |
| X_train_pca = pca.fit_transform(X_train_scaled) |
| X_test_pca = pca.transform(X_test_scaled) |
|
|
| print(f"Data Prepared. Training on {len(X_train_pca)} healthy shots.") |
| print(f"Testing on {len(X_test_pca)} shots ({len(X_disruptive)} disruptions).") |
|
|
| |
| print("\nRunning Isolation Forest...") |
| iso = IsolationForest(contamination=0.1, random_state=42) |
| iso.fit(X_train_pca) |
| y_pred_iso = -iso.score_samples(X_test_pca) |
| auc_iso = roc_auc_score(y_test, y_pred_iso) |
| print(f" >> Isolation Forest AUC: {auc_iso:.4f}") |
|
|
| |
| print("Running One-Class SVM...") |
| svm = OneClassSVM(nu=0.1) |
| svm.fit(X_train_pca) |
| y_pred_svm = -svm.score_samples(X_test_pca) |
| auc_svm = roc_auc_score(y_test, y_pred_svm) |
| print(f" >> One-Class SVM AUC: {auc_svm:.4f}") |
|
|
| |
| np.savez('pca_dataset.npz', X_train=X_train_pca, X_test=X_test_pca, y_test=y_test) |
| print("\n✅ PCA Data saved to 'pca_dataset.npz'.") |