| |
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| from sklearn.decomposition import PCA |
|
|
| def generate_complex_tokamak_data(n_samples=5000, n_features=50): |
| """ |
| Simulates complex, multi-modal Tokamak data. |
| Modes: |
| 1. Ramp-up (High variation, 'The Takeoff') |
| 2. Flat-top (Stable, tight cluster, 'The Cruise') |
| 3. Ramp-down (Lower energy, 'The Landing') |
| """ |
| print(f"--- GENERATING PHYSICS-INFORMED DATA ({n_samples} samples) ---") |
| |
| np.random.seed(42) |
|
|
| |
| |
| |
| |
| n_a = int(0.25 * n_samples) |
| |
| mean_a = np.zeros(n_features) - 2.0 |
| cov_a = np.eye(n_features) * 0.6 |
| data_a = np.random.multivariate_normal(mean_a, cov_a, n_a) |
|
|
| |
| |
| n_b = int(0.50 * n_samples) |
| mean_b = np.zeros(n_features) |
| cov_b = np.eye(n_features) * 0.3 |
| data_b = np.random.multivariate_normal(mean_b, cov_b, n_b) |
|
|
| |
| n_c = n_samples - n_a - n_b |
| mean_c = np.zeros(n_features) + 2.0 |
| cov_c = np.eye(n_features) * 0.6 |
| data_c = np.random.multivariate_normal(mean_c, cov_c, n_c) |
|
|
| |
| X_healthy = np.vstack([data_a, data_b, data_c]) |
| |
| |
| n_anom = int(n_samples * 0.1) |
| |
| |
| |
| |
| n_anom_1 = int(n_anom * 0.6) |
| mean_bad_1 = np.zeros(n_features) - 1.0 |
| cov_bad_1 = np.eye(n_features) * 0.15 |
| data_bad_1 = np.random.multivariate_normal(mean_bad_1, cov_bad_1, n_anom_1) |
|
|
| |
| n_anom_2 = n_anom - n_anom_1 |
| mean_bad_2 = np.ones(n_features) * 3.5 |
| cov_bad_2 = np.eye(n_features) * 0.5 |
| data_bad_2 = np.random.multivariate_normal(mean_bad_2, cov_bad_2, n_anom_2) |
| |
| X_anomalous = np.vstack([data_bad_1, data_bad_2]) |
|
|
| |
| df_healthy = pd.DataFrame(X_healthy, columns=[f'sensor_{i}' for i in range(n_features)]) |
| df_healthy['label'] = 0 |
|
|
| df_anomalous = pd.DataFrame(X_anomalous, columns=[f'sensor_{i}' for i in range(n_features)]) |
| df_anomalous['label'] = 1 |
|
|
| df_total = pd.concat([df_healthy, df_anomalous], ignore_index=True) |
| |
| |
| df_total = df_total.sample(frac=1).reset_index(drop=True) |
| |
| print(f"Generated {len(df_healthy)} healthy and {len(df_anomalous)} anomalous samples.") |
| return df_total |
|
|
| if __name__ == "__main__": |
| |
| df = generate_complex_tokamak_data() |
| df.to_csv('complex_tokamak_data.csv', index=False) |
| print("Saved to 'complex_tokamak_data.csv'") |
| |
| |
| |
| print("Generating preview plot...") |
| pca = PCA(n_components=2) |
| X = df.drop('label', axis=1) |
| y = df['label'] |
| X_pca = pca.fit_transform(X) |
| |
| plt.figure(figsize=(10, 8)) |
| |
| |
| plt.scatter(X_pca[y==0, 0], X_pca[y==0, 1], |
| c='blue', alpha=0.2, s=10, label='Healthy (3 Modes)') |
| |
| |
| plt.scatter(X_pca[y==1, 0], X_pca[y==1, 1], |
| c='red', alpha=0.6, s=10, label='Anomalies (The Trap)') |
| |
| plt.title("V2 Benchmark Data: The 'Three Islands' Topology") |
| plt.xlabel("Principal Component 1") |
| plt.ylabel("Principal Component 2") |
| plt.legend() |
| plt.grid(True, alpha=0.3) |
| |
| plt.savefig('v2_data_topology.png') |
| print("Plot saved to 'v2_data_topology.png'. Open it to see the 'Islands'.") |