File size: 5,212 Bytes
17313b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import os
# --- CONFIGURATION ---
BATCH_SIZE = 32
EPOCHS = 40
LEARNING_RATE = 1e-4
SEQ_LEN = 100
print("๐ INITIATING 'BOSS FIGHT': SUPERVISED 1D CNN...")
# 1. LOAD DATA
possible_paths = ['vG.0.1/real_tokamak_data_v2.csv', 'real_tokamak_data_v2.csv']
df = None
for path in possible_paths:
if os.path.exists(path):
print(f" โ
Found data at: {path}")
df = pd.read_csv(path)
break
if df is None: exit()
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)
# 2. CHANNELS: USE EVERYTHING (Physics + IP)
# The CNN handles scale differences better than AE
prefixes = ['ip', 'n1', 'beta', 'li', 'q95']
found_channels = []
print(" Extracting All Channels...")
for p in prefixes:
cols = [c for c in df.columns if c.startswith(p + '_')]
cols.sort(key=lambda x: int(x.split('_')[1]))
if len(cols) == SEQ_LEN: found_channels.append(df[cols].values)
X_stacked = np.stack(found_channels, axis=1) # (N, 5, 100)
y = df['label'].values
# Split
X_train, X_test, y_train, y_test = train_test_split(X_stacked, y, test_size=0.2, stratify=y, random_state=42)
# 3. SCALING
# Fit scaler on Train (All data, not just healthy, to handle full range)
n_channels = X_train.shape[1]
for i in range(n_channels):
scaler = MinMaxScaler()
scaler.fit(X_train[:, i, :])
X_train[:, i, :] = scaler.transform(X_train[:, i, :])
X_test[:, i, :] = scaler.transform(X_test[:, i, :])
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)
# PyTorch (Includes Labels now!)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1) # (N, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
# y_test is kept as numpy for sklearn scoring
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# 4. SUPERVISED CNN ARCHITECTURE
class TokamakCNN(nn.Module):
def __init__(self, n_channels):
super(TokamakCNN, self).__init__()
# Feature Extractor (The Eye)
self.features = nn.Sequential(
# Block 1
nn.Conv1d(n_channels, 32, kernel_size=3, padding=1),
nn.BatchNorm1d(32),
nn.ReLU(),
nn.MaxPool1d(2), # 100 -> 50
# Block 2
nn.Conv1d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(2), # 50 -> 25
# Block 3 (Deep Features)
nn.Conv1d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.AdaptiveAvgPool1d(1) # Crunch to (Batch, 128, 1)
)
# Classifier (The Brain)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Dropout(0.5), # Prevent overfitting
nn.Linear(64, 1),
nn.Sigmoid()
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = TokamakCNN(n_channels).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.BCELoss() # Binary Cross Entropy (Supervised)
# 5. TRAINING
print(f"\n๐ Training Supervised CNN (The Real Boss)...")
model.train()
for epoch in range(EPOCHS):
total_loss = 0
correct = 0
total = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
# Accuracy tracking
predicted = (outputs > 0.5).float()
correct += (predicted == labels).sum().item()
total += labels.size(0)
acc = correct / total
if (epoch+1) % 5 == 0:
print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f} | Train Acc: {acc:.2%}")
# 6. EVALUATION
print("\nโ๏ธ Evaluating...")
model.eval()
with torch.no_grad():
y_probs = model(X_test_tensor.to(device)).cpu().numpy()
cnn_auc = roc_auc_score(y_test, y_probs)
print("\n" + "="*40)
print(f"๐ฅ FINAL BOSS RESULTS")
print("="*40)
print(f"๐ฒ Random Forest Baseline: ~0.8800")
print(f"๐ง Supervised CNN Score: {cnn_auc:.4f}")
print("="*40)
if cnn_auc > 0.90:
print("๐ RESULT: We have a true SOTA baseline.")
print("๐ NEXT STEP: This is the number Quantum must help.")
else:
print("โ ๏ธ RESULT: Even Supervised CNN struggles. Data might be noisy.")
torch.save(model.state_dict(), 'cnn_supervised_boss.pth') |