# Save this as physics_data.py import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.decomposition import PCA def generate_complex_tokamak_data(n_samples=5000, n_features=50): """ Simulates complex, multi-modal Tokamak data. Modes: 1. Ramp-up (High variation, 'The Takeoff') 2. Flat-top (Stable, tight cluster, 'The Cruise') 3. Ramp-down (Lower energy, 'The Landing') """ print(f"--- GENERATING PHYSICS-INFORMED DATA ({n_samples} samples) ---") np.random.seed(42) # --- 1. HEALTHY DATA (The "Three Phases") --- # This mimics the distinct operational phases of a tokamak discharge. # Mode A: Ramp-Up (25% of data) - Noisy, rising current n_a = int(0.25 * n_samples) # Centered at (-2, -2) in latent space roughly mean_a = np.zeros(n_features) - 2.0 cov_a = np.eye(n_features) * 0.6 data_a = np.random.multivariate_normal(mean_a, cov_a, n_a) # Mode B: Flat-Top (50% of data) - The Stable Core # This is the "safe" fusion state. n_b = int(0.50 * n_samples) mean_b = np.zeros(n_features) cov_b = np.eye(n_features) * 0.3 # Very tight, organized data_b = np.random.multivariate_normal(mean_b, cov_b, n_b) # Mode C: Ramp-Down (25% of data) - Cooling down n_c = n_samples - n_a - n_b mean_c = np.zeros(n_features) + 2.0 cov_c = np.eye(n_features) * 0.6 data_c = np.random.multivariate_normal(mean_c, cov_c, n_c) # Stack them to create the "Healthy Manifold" X_healthy = np.vstack([data_a, data_b, data_c]) # --- 2. ANOMALOUS DATA (The "Trap") --- n_anom = int(n_samples * 0.1) # 10% anomaly rate # Type 1: The "Bridge Gap" (Points hidden BETWEEN modes) # These mimic "failed transitions" or "locked modes" # If the discriminator is lazy, it will think these gaps are safe bridges. n_anom_1 = int(n_anom * 0.6) mean_bad_1 = np.zeros(n_features) - 1.0 # Stuck between Ramp and Flat-top cov_bad_1 = np.eye(n_features) * 0.15 # Tight cluster in the gap data_bad_1 = np.random.multivariate_normal(mean_bad_1, cov_bad_1, n_anom_1) # Type 2: The "Hard Disruption" (Energy Spike / Greenwald Limit) n_anom_2 = n_anom - n_anom_1 mean_bad_2 = np.ones(n_features) * 3.5 # Way outside normal bounds cov_bad_2 = np.eye(n_features) * 0.5 data_bad_2 = np.random.multivariate_normal(mean_bad_2, cov_bad_2, n_anom_2) X_anomalous = np.vstack([data_bad_1, data_bad_2]) # --- 3. DATAFRAME CREATION --- df_healthy = pd.DataFrame(X_healthy, columns=[f'sensor_{i}' for i in range(n_features)]) df_healthy['label'] = 0 # 0 = Healthy df_anomalous = pd.DataFrame(X_anomalous, columns=[f'sensor_{i}' for i in range(n_features)]) df_anomalous['label'] = 1 # 1 = Anomalous df_total = pd.concat([df_healthy, df_anomalous], ignore_index=True) # Shuffle the deck df_total = df_total.sample(frac=1).reset_index(drop=True) print(f"Generated {len(df_healthy)} healthy and {len(df_anomalous)} anomalous samples.") return df_total if __name__ == "__main__": # Generate and Save df = generate_complex_tokamak_data() df.to_csv('complex_tokamak_data.csv', index=False) print("Saved to 'complex_tokamak_data.csv'") # --- VISUAL PROOF --- # We use PCA to project the 50D data to 2D so you can SEE the islands print("Generating preview plot...") pca = PCA(n_components=2) X = df.drop('label', axis=1) y = df['label'] X_pca = pca.fit_transform(X) plt.figure(figsize=(10, 8)) # Plot Healthy (Blue) plt.scatter(X_pca[y==0, 0], X_pca[y==0, 1], c='blue', alpha=0.2, s=10, label='Healthy (3 Modes)') # Plot Anomalous (Red) plt.scatter(X_pca[y==1, 0], X_pca[y==1, 1], c='red', alpha=0.6, s=10, label='Anomalies (The Trap)') plt.title("V2 Benchmark Data: The 'Three Islands' Topology") plt.xlabel("Principal Component 1") plt.ylabel("Principal Component 2") plt.legend() plt.grid(True, alpha=0.3) plt.savefig('v2_data_topology.png') print("Plot saved to 'v2_data_topology.png'. Open it to see the 'Islands'.")