QGAN_Project / vG0.2 /Ftrain_supervised_cnn.py
1bnjmn3's picture
Add files using upload-large-folder tool
17313b4 verified
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import os
# --- CONFIGURATION ---
BATCH_SIZE = 32
EPOCHS = 40
LEARNING_RATE = 1e-4
SEQ_LEN = 100
print("๐Ÿš€ INITIATING 'BOSS FIGHT': SUPERVISED 1D CNN...")
# 1. LOAD DATA
possible_paths = ['vG.0.1/real_tokamak_data_v2.csv', 'real_tokamak_data_v2.csv']
df = None
for path in possible_paths:
if os.path.exists(path):
print(f" โœ… Found data at: {path}")
df = pd.read_csv(path)
break
if df is None: exit()
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)
# 2. CHANNELS: USE EVERYTHING (Physics + IP)
# The CNN handles scale differences better than AE
prefixes = ['ip', 'n1', 'beta', 'li', 'q95']
found_channels = []
print(" Extracting All Channels...")
for p in prefixes:
cols = [c for c in df.columns if c.startswith(p + '_')]
cols.sort(key=lambda x: int(x.split('_')[1]))
if len(cols) == SEQ_LEN: found_channels.append(df[cols].values)
X_stacked = np.stack(found_channels, axis=1) # (N, 5, 100)
y = df['label'].values
# Split
X_train, X_test, y_train, y_test = train_test_split(X_stacked, y, test_size=0.2, stratify=y, random_state=42)
# 3. SCALING
# Fit scaler on Train (All data, not just healthy, to handle full range)
n_channels = X_train.shape[1]
for i in range(n_channels):
scaler = MinMaxScaler()
scaler.fit(X_train[:, i, :])
X_train[:, i, :] = scaler.transform(X_train[:, i, :])
X_test[:, i, :] = scaler.transform(X_test[:, i, :])
X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)
# PyTorch (Includes Labels now!)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1) # (N, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
# y_test is kept as numpy for sklearn scoring
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# 4. SUPERVISED CNN ARCHITECTURE
class TokamakCNN(nn.Module):
def __init__(self, n_channels):
super(TokamakCNN, self).__init__()
# Feature Extractor (The Eye)
self.features = nn.Sequential(
# Block 1
nn.Conv1d(n_channels, 32, kernel_size=3, padding=1),
nn.BatchNorm1d(32),
nn.ReLU(),
nn.MaxPool1d(2), # 100 -> 50
# Block 2
nn.Conv1d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(2), # 50 -> 25
# Block 3 (Deep Features)
nn.Conv1d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.AdaptiveAvgPool1d(1) # Crunch to (Batch, 128, 1)
)
# Classifier (The Brain)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Dropout(0.5), # Prevent overfitting
nn.Linear(64, 1),
nn.Sigmoid()
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = TokamakCNN(n_channels).to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.BCELoss() # Binary Cross Entropy (Supervised)
# 5. TRAINING
print(f"\n๐Ÿ”„ Training Supervised CNN (The Real Boss)...")
model.train()
for epoch in range(EPOCHS):
total_loss = 0
correct = 0
total = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
# Accuracy tracking
predicted = (outputs > 0.5).float()
correct += (predicted == labels).sum().item()
total += labels.size(0)
acc = correct / total
if (epoch+1) % 5 == 0:
print(f" Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader):.4f} | Train Acc: {acc:.2%}")
# 6. EVALUATION
print("\nโš”๏ธ Evaluating...")
model.eval()
with torch.no_grad():
y_probs = model(X_test_tensor.to(device)).cpu().numpy()
cnn_auc = roc_auc_score(y_test, y_probs)
print("\n" + "="*40)
print(f"๐Ÿ”ฅ FINAL BOSS RESULTS")
print("="*40)
print(f"๐ŸŒฒ Random Forest Baseline: ~0.8800")
print(f"๐Ÿง  Supervised CNN Score: {cnn_auc:.4f}")
print("="*40)
if cnn_auc > 0.90:
print("๐Ÿ† RESULT: We have a true SOTA baseline.")
print("๐Ÿ‘‰ NEXT STEP: This is the number Quantum must help.")
else:
print("โš ๏ธ RESULT: Even Supervised CNN struggles. Data might be noisy.")
torch.save(model.state_dict(), 'cnn_supervised_boss.pth')