QGAN_Project / Phase 1 /physics_data
1bnjmn3's picture
Add files using upload-large-folder tool
c5f84b2 verified
# Save this as physics_data.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
def generate_complex_tokamak_data(n_samples=5000, n_features=50):
"""
Simulates complex, multi-modal Tokamak data.
Modes:
1. Ramp-up (High variation, 'The Takeoff')
2. Flat-top (Stable, tight cluster, 'The Cruise')
3. Ramp-down (Lower energy, 'The Landing')
"""
print(f"--- GENERATING PHYSICS-INFORMED DATA ({n_samples} samples) ---")
np.random.seed(42)
# --- 1. HEALTHY DATA (The "Three Phases") ---
# This mimics the distinct operational phases of a tokamak discharge.
# Mode A: Ramp-Up (25% of data) - Noisy, rising current
n_a = int(0.25 * n_samples)
# Centered at (-2, -2) in latent space roughly
mean_a = np.zeros(n_features) - 2.0
cov_a = np.eye(n_features) * 0.6
data_a = np.random.multivariate_normal(mean_a, cov_a, n_a)
# Mode B: Flat-Top (50% of data) - The Stable Core
# This is the "safe" fusion state.
n_b = int(0.50 * n_samples)
mean_b = np.zeros(n_features)
cov_b = np.eye(n_features) * 0.3 # Very tight, organized
data_b = np.random.multivariate_normal(mean_b, cov_b, n_b)
# Mode C: Ramp-Down (25% of data) - Cooling down
n_c = n_samples - n_a - n_b
mean_c = np.zeros(n_features) + 2.0
cov_c = np.eye(n_features) * 0.6
data_c = np.random.multivariate_normal(mean_c, cov_c, n_c)
# Stack them to create the "Healthy Manifold"
X_healthy = np.vstack([data_a, data_b, data_c])
# --- 2. ANOMALOUS DATA (The "Trap") ---
n_anom = int(n_samples * 0.1) # 10% anomaly rate
# Type 1: The "Bridge Gap" (Points hidden BETWEEN modes)
# These mimic "failed transitions" or "locked modes"
# If the discriminator is lazy, it will think these gaps are safe bridges.
n_anom_1 = int(n_anom * 0.6)
mean_bad_1 = np.zeros(n_features) - 1.0 # Stuck between Ramp and Flat-top
cov_bad_1 = np.eye(n_features) * 0.15 # Tight cluster in the gap
data_bad_1 = np.random.multivariate_normal(mean_bad_1, cov_bad_1, n_anom_1)
# Type 2: The "Hard Disruption" (Energy Spike / Greenwald Limit)
n_anom_2 = n_anom - n_anom_1
mean_bad_2 = np.ones(n_features) * 3.5 # Way outside normal bounds
cov_bad_2 = np.eye(n_features) * 0.5
data_bad_2 = np.random.multivariate_normal(mean_bad_2, cov_bad_2, n_anom_2)
X_anomalous = np.vstack([data_bad_1, data_bad_2])
# --- 3. DATAFRAME CREATION ---
df_healthy = pd.DataFrame(X_healthy, columns=[f'sensor_{i}' for i in range(n_features)])
df_healthy['label'] = 0 # 0 = Healthy
df_anomalous = pd.DataFrame(X_anomalous, columns=[f'sensor_{i}' for i in range(n_features)])
df_anomalous['label'] = 1 # 1 = Anomalous
df_total = pd.concat([df_healthy, df_anomalous], ignore_index=True)
# Shuffle the deck
df_total = df_total.sample(frac=1).reset_index(drop=True)
print(f"Generated {len(df_healthy)} healthy and {len(df_anomalous)} anomalous samples.")
return df_total
if __name__ == "__main__":
# Generate and Save
df = generate_complex_tokamak_data()
df.to_csv('complex_tokamak_data.csv', index=False)
print("Saved to 'complex_tokamak_data.csv'")
# --- VISUAL PROOF ---
# We use PCA to project the 50D data to 2D so you can SEE the islands
print("Generating preview plot...")
pca = PCA(n_components=2)
X = df.drop('label', axis=1)
y = df['label']
X_pca = pca.fit_transform(X)
plt.figure(figsize=(10, 8))
# Plot Healthy (Blue)
plt.scatter(X_pca[y==0, 0], X_pca[y==0, 1],
c='blue', alpha=0.2, s=10, label='Healthy (3 Modes)')
# Plot Anomalous (Red)
plt.scatter(X_pca[y==1, 0], X_pca[y==1, 1],
c='red', alpha=0.6, s=10, label='Anomalies (The Trap)')
plt.title("V2 Benchmark Data: The 'Three Islands' Topology")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('v2_data_topology.png')
print("Plot saved to 'v2_data_topology.png'. Open it to see the 'Islands'.")