import torch import numpy as np import torchvision.transforms as transforms from sklearn.metrics import accuracy_score from sklearn.decomposition import TruncatedSVD from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler def fit_svd_baseline(X_train, y_train, n_components=20): """Fits a linear baseline (SVD + Logistic Regression) on the fly.""" pipeline = Pipeline([ ('scaler', StandardScaler(with_std=False)), ('svd', TruncatedSVD(n_components=n_components, random_state=42)), ('logistic', LogisticRegression(max_iter=1000)) ]) pipeline.fit(X_train, y_train) return pipeline def add_gaussian_noise(X, sigma): """ Uniform noise addition for both torch Tensors and numpy arrays. Returns the same type as input. """ if sigma <= 0: return X if torch.is_tensor(X): noise = torch.randn_like(X) * sigma return torch.clamp(X + noise, 0, 1) else: noise = np.random.randn(*X.shape) * sigma return np.clip(X + noise, 0, 1) def add_svd_aligned_noise(X, sigma, components): """ Adds noise that is projected onto the SVD components, living entirely within the 'signal' subspace. """ if sigma <= 0: return X is_tensor = torch.is_tensor(X) # Flatten if needed orig_shape = list(X.shape) if is_tensor: X_flat = X.cpu().numpy().reshape(orig_shape[0], -1) components_np = components.cpu().numpy() if torch.is_tensor(components) else components else: X_flat = X.reshape(orig_shape[0], -1) components_np = components # 1. Generate random Gaussian noise in full dimensionality noise = np.random.randn(*X_flat.shape) * sigma # 2. Project noise onto components (V_k) # V_k (components_np) is assumed to be (k, 784) # Projection P = V_k^T @ V_k projected_noise = (noise @ components_np.T) @ components_np # 3. Add back and clip X_noisy = X_flat + projected_noise X_noisy = np.clip(X_noisy, 0, 1) if is_tensor: return torch.from_numpy(X_noisy).float().view(orig_shape) else: return X_noisy.reshape(orig_shape) def add_blur(X, kernel_size): """Unified blur for torch Tensors (4D: B, C, H, W).""" if kernel_size <= 1: return X sigma = 0.1 + 0.3 * (kernel_size // 2) blur_fn = transforms.GaussianBlur(kernel_size=(kernel_size, kernel_size), sigma=(sigma, sigma)) return blur_fn(X) def evaluate_classifier(model, X, y, device="cpu", is_pytorch=True): """ Unified evaluation function. Handles PyTorch models (CNN, Hybrid) and Sklearn pipelines (SVD+LR). """ if is_pytorch: model.eval() model.to(device) # Ensure X is 4D for CNN (B, 1, 28, 28) if len(X.shape) == 2: X_t = torch.as_tensor(X.reshape(-1, 1, 28, 28), dtype=torch.float32).to(device) else: X_t = torch.as_tensor(X, dtype=torch.float32).to(device) y_t = torch.as_tensor(y, dtype=torch.long).to(device) with torch.no_grad(): logits = model(X_t) preds = torch.argmax(logits, dim=1).cpu().numpy() return accuracy_score(y, preds) else: # Sklearn pipeline - Ensure X is flattened 2D numpy if torch.is_tensor(X): X_np = X.view(X.size(0), -1).cpu().numpy() else: X_np = X.reshape(X.shape[0], -1) preds = model.predict(X_np) return accuracy_score(y, preds)