Spaces:

ymlin105
/

Coconut-MNIST

Sleeping

File size: 3,578 Bytes

import torch
import numpy as np
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

def fit_svd_baseline(X_train, y_train, n_components=20):
    """Fits a linear baseline (SVD + Logistic Regression) on the fly."""
    pipeline = Pipeline([
        ('scaler', StandardScaler(with_std=False)),
        ('svd', TruncatedSVD(n_components=n_components, random_state=42)),
        ('logistic', LogisticRegression(max_iter=1000))
    ])
    pipeline.fit(X_train, y_train)
    return pipeline

def add_gaussian_noise(X, sigma):
    """
    Uniform noise addition for both torch Tensors and numpy arrays.
    Returns the same type as input.
    """
    if sigma <= 0: return X
    if torch.is_tensor(X):
        noise = torch.randn_like(X) * sigma
        return torch.clamp(X + noise, 0, 1)
    else:
        noise = np.random.randn(*X.shape) * sigma
        return np.clip(X + noise, 0, 1)

def add_svd_aligned_noise(X, sigma, components):
    """
    Adds noise that is projected onto the SVD components, living entirely
    within the 'signal' subspace.
    """
    if sigma <= 0: return X
    is_tensor = torch.is_tensor(X)
    
    # Flatten if needed
    orig_shape = list(X.shape)
    if is_tensor:
        X_flat = X.cpu().numpy().reshape(orig_shape[0], -1)
        components_np = components.cpu().numpy() if torch.is_tensor(components) else components
    else:
        X_flat = X.reshape(orig_shape[0], -1)
        components_np = components
    
    # 1. Generate random Gaussian noise in full dimensionality
    noise = np.random.randn(*X_flat.shape) * sigma
    
    # 2. Project noise onto components (V_k)
    # V_k (components_np) is assumed to be (k, 784)
    # Projection P = V_k^T @ V_k
    projected_noise = (noise @ components_np.T) @ components_np
    
    # 3. Add back and clip
    X_noisy = X_flat + projected_noise
    X_noisy = np.clip(X_noisy, 0, 1)
    
    if is_tensor:
        return torch.from_numpy(X_noisy).float().view(orig_shape)
    else:
        return X_noisy.reshape(orig_shape)

def add_blur(X, kernel_size):
    """Unified blur for torch Tensors (4D: B, C, H, W)."""
    if kernel_size <= 1: 
        return X
    sigma = 0.1 + 0.3 * (kernel_size // 2)
    blur_fn = transforms.GaussianBlur(kernel_size=(kernel_size, kernel_size), sigma=(sigma, sigma))
    return blur_fn(X)

def evaluate_classifier(model, X, y, device="cpu", is_pytorch=True):
    """
    Unified evaluation function.
    Handles PyTorch models (CNN, Hybrid) and Sklearn pipelines (SVD+LR).
    """
    if is_pytorch:
        model.eval()
        model.to(device)
        # Ensure X is 4D for CNN (B, 1, 28, 28)
        if len(X.shape) == 2:
            X_t = torch.as_tensor(X.reshape(-1, 1, 28, 28), dtype=torch.float32).to(device)
        else:
            X_t = torch.as_tensor(X, dtype=torch.float32).to(device)
        
        y_t = torch.as_tensor(y, dtype=torch.long).to(device)
        
        with torch.no_grad():
            logits = model(X_t)
            preds = torch.argmax(logits, dim=1).cpu().numpy()
        return accuracy_score(y, preds)
    else:
        # Sklearn pipeline - Ensure X is flattened 2D numpy
        if torch.is_tensor(X):
            X_np = X.view(X.size(0), -1).cpu().numpy()
        else:
            X_np = X.reshape(X.shape[0], -1)
        preds = model.predict(X_np)
        return accuracy_score(y, preds)