import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_auc_score import os # --- CONFIGURATION --- BATCH_SIZE = 32 EPOCHS = 40 LEARNING_RATE = 1e-4 SEQ_LEN = 100 print("šŸš€ INITIATING 'BOSS FIGHT': SUPERVISED 1D CNN...") # 1. LOAD DATA possible_paths = ['vG.0.1/real_tokamak_data_v2.csv', 'real_tokamak_data_v2.csv'] df = None for path in possible_paths: if os.path.exists(path): print(f" āœ… Found data at: {path}") df = pd.read_csv(path) break if df is None: exit() df.replace([np.inf, -np.inf], np.nan, inplace=True) df.fillna(0, inplace=True) # 2. CHANNELS: USE EVERYTHING (Physics + IP) # The CNN handles scale differences better than AE prefixes = ['ip', 'n1', 'beta', 'li', 'q95'] found_channels = [] print(" Extracting All Channels...") for p in prefixes: cols = [c for c in df.columns if c.startswith(p + '_')] cols.sort(key=lambda x: int(x.split('_')[1])) if len(cols) == SEQ_LEN: found_channels.append(df[cols].values) X_stacked = np.stack(found_channels, axis=1) # (N, 5, 100) y = df['label'].values # Split X_train, X_test, y_train, y_test = train_test_split(X_stacked, y, test_size=0.2, stratify=y, random_state=42) # 3. SCALING # Fit scaler on Train (All data, not just healthy, to handle full range) n_channels = X_train.shape[1] for i in range(n_channels): scaler = MinMaxScaler() scaler.fit(X_train[:, i, :]) X_train[:, i, :] = scaler.transform(X_train[:, i, :]) X_test[:, i, :] = scaler.transform(X_test[:, i, :]) X_train = np.nan_to_num(X_train) X_test = np.nan_to_num(X_test) # PyTorch (Includes Labels now!) X_train_tensor = torch.tensor(X_train, dtype=torch.float32) y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1) # (N, 1) X_test_tensor = torch.tensor(X_test, dtype=torch.float32) # y_test is kept as numpy for sklearn scoring train_dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) # 4. SUPERVISED CNN ARCHITECTURE class TokamakCNN(nn.Module): def __init__(self, n_channels): super(TokamakCNN, self).__init__() # Feature Extractor (The Eye) self.features = nn.Sequential( # Block 1 nn.Conv1d(n_channels, 32, kernel_size=3, padding=1), nn.BatchNorm1d(32), nn.ReLU(), nn.MaxPool1d(2), # 100 -> 50 # Block 2 nn.Conv1d(32, 64, kernel_size=3, padding=1), nn.BatchNorm1d(64), nn.ReLU(), nn.MaxPool1d(2), # 50 -> 25 # Block 3 (Deep Features) nn.Conv1d(64, 128, kernel_size=3, padding=1), nn.BatchNorm1d(128), nn.ReLU(), nn.AdaptiveAvgPool1d(1) # Crunch to (Batch, 128, 1) ) # Classifier (The Brain) self.classifier = nn.Sequential( nn.Flatten(), nn.Linear(128, 64), nn.ReLU(), nn.Dropout(0.5), # Prevent overfitting nn.Linear(64, 1), nn.Sigmoid() ) def forward(self, x): x = self.features(x) x = self.classifier(x) return x device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") model = TokamakCNN(n_channels).to(device) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) criterion = nn.BCELoss() # Binary Cross Entropy (Supervised) # 5. TRAINING print(f"\nšŸ”„ Training Supervised CNN (The Real Boss)...") model.train() for epoch in range(EPOCHS): total_loss = 0 correct = 0 total = 0 for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() total_loss += loss.item() # Accuracy tracking predicted = (outputs > 0.5).float() correct += (predicted == labels).sum().item() total += labels.size(0) acc = correct / total if (epoch+1) % 5 == 0: print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f} | Train Acc: {acc:.2%}") # 6. EVALUATION print("\nāš”ļø Evaluating...") model.eval() with torch.no_grad(): y_probs = model(X_test_tensor.to(device)).cpu().numpy() cnn_auc = roc_auc_score(y_test, y_probs) print("\n" + "="*40) print(f"šŸ”„ FINAL BOSS RESULTS") print("="*40) print(f"🌲 Random Forest Baseline: ~0.8800") print(f"🧠 Supervised CNN Score: {cnn_auc:.4f}") print("="*40) if cnn_auc > 0.90: print("šŸ† RESULT: We have a true SOTA baseline.") print("šŸ‘‰ NEXT STEP: This is the number Quantum must help.") else: print("āš ļø RESULT: Even Supervised CNN struggles. Data might be noisy.") torch.save(model.state_dict(), 'cnn_supervised_boss.pth')