| import numpy as np |
| import pandas as pd |
| import torch |
| import torch.nn as nn |
| import torch.optim as optim |
| from torch.utils.data import DataLoader, TensorDataset |
| from sklearn.model_selection import train_test_split |
| from sklearn.preprocessing import MinMaxScaler |
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.metrics import roc_auc_score |
| import os |
|
|
| |
| BATCH_SIZE = 32 |
| EPOCHS = 40 |
| LEARNING_RATE = 1e-4 |
| SEQ_LEN = 100 |
|
|
| print("๐ INITIATING 'BOSS FIGHT': SUPERVISED 1D CNN...") |
|
|
| |
| possible_paths = ['vG.0.1/real_tokamak_data_v2.csv', 'real_tokamak_data_v2.csv'] |
| df = None |
| for path in possible_paths: |
| if os.path.exists(path): |
| print(f" โ
Found data at: {path}") |
| df = pd.read_csv(path) |
| break |
| if df is None: exit() |
|
|
| df.replace([np.inf, -np.inf], np.nan, inplace=True) |
| df.fillna(0, inplace=True) |
|
|
| |
| |
| prefixes = ['ip', 'n1', 'beta', 'li', 'q95'] |
| found_channels = [] |
|
|
| print(" Extracting All Channels...") |
| for p in prefixes: |
| cols = [c for c in df.columns if c.startswith(p + '_')] |
| cols.sort(key=lambda x: int(x.split('_')[1])) |
| if len(cols) == SEQ_LEN: found_channels.append(df[cols].values) |
|
|
| X_stacked = np.stack(found_channels, axis=1) |
| y = df['label'].values |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split(X_stacked, y, test_size=0.2, stratify=y, random_state=42) |
|
|
| |
| |
| n_channels = X_train.shape[1] |
| for i in range(n_channels): |
| scaler = MinMaxScaler() |
| scaler.fit(X_train[:, i, :]) |
| X_train[:, i, :] = scaler.transform(X_train[:, i, :]) |
| X_test[:, i, :] = scaler.transform(X_test[:, i, :]) |
|
|
| X_train = np.nan_to_num(X_train) |
| X_test = np.nan_to_num(X_test) |
|
|
| |
| X_train_tensor = torch.tensor(X_train, dtype=torch.float32) |
| y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1) |
|
|
| X_test_tensor = torch.tensor(X_test, dtype=torch.float32) |
| |
|
|
| train_dataset = TensorDataset(X_train_tensor, y_train_tensor) |
| train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) |
|
|
| |
| class TokamakCNN(nn.Module): |
| def __init__(self, n_channels): |
| super(TokamakCNN, self).__init__() |
| |
| |
| self.features = nn.Sequential( |
| |
| nn.Conv1d(n_channels, 32, kernel_size=3, padding=1), |
| nn.BatchNorm1d(32), |
| nn.ReLU(), |
| nn.MaxPool1d(2), |
| |
| |
| nn.Conv1d(32, 64, kernel_size=3, padding=1), |
| nn.BatchNorm1d(64), |
| nn.ReLU(), |
| nn.MaxPool1d(2), |
| |
| |
| nn.Conv1d(64, 128, kernel_size=3, padding=1), |
| nn.BatchNorm1d(128), |
| nn.ReLU(), |
| nn.AdaptiveAvgPool1d(1) |
| ) |
| |
| |
| self.classifier = nn.Sequential( |
| nn.Flatten(), |
| nn.Linear(128, 64), |
| nn.ReLU(), |
| nn.Dropout(0.5), |
| nn.Linear(64, 1), |
| nn.Sigmoid() |
| ) |
|
|
| def forward(self, x): |
| x = self.features(x) |
| x = self.classifier(x) |
| return x |
|
|
| device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") |
| model = TokamakCNN(n_channels).to(device) |
| optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) |
| criterion = nn.BCELoss() |
|
|
| |
| print(f"\n๐ Training Supervised CNN (The Real Boss)...") |
| model.train() |
| for epoch in range(EPOCHS): |
| total_loss = 0 |
| correct = 0 |
| total = 0 |
| |
| for inputs, labels in train_loader: |
| inputs, labels = inputs.to(device), labels.to(device) |
| |
| optimizer.zero_grad() |
| outputs = model(inputs) |
| loss = criterion(outputs, labels) |
| loss.backward() |
| optimizer.step() |
| |
| total_loss += loss.item() |
| |
| |
| predicted = (outputs > 0.5).float() |
| correct += (predicted == labels).sum().item() |
| total += labels.size(0) |
| |
| acc = correct / total |
| if (epoch+1) % 5 == 0: |
| print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f} | Train Acc: {acc:.2%}") |
|
|
| |
| print("\nโ๏ธ Evaluating...") |
| model.eval() |
| with torch.no_grad(): |
| y_probs = model(X_test_tensor.to(device)).cpu().numpy() |
|
|
| cnn_auc = roc_auc_score(y_test, y_probs) |
|
|
| print("\n" + "="*40) |
| print(f"๐ฅ FINAL BOSS RESULTS") |
| print("="*40) |
| print(f"๐ฒ Random Forest Baseline: ~0.8800") |
| print(f"๐ง Supervised CNN Score: {cnn_auc:.4f}") |
| print("="*40) |
|
|
| if cnn_auc > 0.90: |
| print("๐ RESULT: We have a true SOTA baseline.") |
| print("๐ NEXT STEP: This is the number Quantum must help.") |
| else: |
| print("โ ๏ธ RESULT: Even Supervised CNN struggles. Data might be noisy.") |
|
|
| torch.save(model.state_dict(), 'cnn_supervised_boss.pth') |