Spaces:

ymlin105
/

Coconut-MNIST

Running

App Files Files Community

Coconut-MNIST / src /exp_utils.py

ymlin105

feat: Refactor experiments and update report

d9b5881 about 2 months ago

raw

history blame contribute delete

3.58 kB

	import torch
	import numpy as np
	import torchvision.transforms as transforms
	from sklearn.metrics import accuracy_score
	from sklearn.decomposition import TruncatedSVD
	from sklearn.linear_model import LogisticRegression
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler

	def fit_svd_baseline(X_train, y_train, n_components=20):
	"""Fits a linear baseline (SVD + Logistic Regression) on the fly."""
	pipeline = Pipeline([
	('scaler', StandardScaler(with_std=False)),
	('svd', TruncatedSVD(n_components=n_components, random_state=42)),
	('logistic', LogisticRegression(max_iter=1000))
	])
	pipeline.fit(X_train, y_train)
	return pipeline

	def add_gaussian_noise(X, sigma):
	"""
	Uniform noise addition for both torch Tensors and numpy arrays.
	Returns the same type as input.
	"""
	if sigma <= 0: return X
	if torch.is_tensor(X):
	noise = torch.randn_like(X) * sigma
	return torch.clamp(X + noise, 0, 1)
	else:
	noise = np.random.randn(X.shape) sigma
	return np.clip(X + noise, 0, 1)

	def add_svd_aligned_noise(X, sigma, components):
	"""
	Adds noise that is projected onto the SVD components, living entirely
	within the 'signal' subspace.
	"""
	if sigma <= 0: return X
	is_tensor = torch.is_tensor(X)

	# Flatten if needed
	orig_shape = list(X.shape)
	if is_tensor:
	X_flat = X.cpu().numpy().reshape(orig_shape[0], -1)
	components_np = components.cpu().numpy() if torch.is_tensor(components) else components
	else:
	X_flat = X.reshape(orig_shape[0], -1)
	components_np = components

	# 1. Generate random Gaussian noise in full dimensionality
	noise = np.random.randn(X_flat.shape) sigma

	# 2. Project noise onto components (V_k)
	# V_k (components_np) is assumed to be (k, 784)
	# Projection P = V_k^T @ V_k
	projected_noise = (noise @ components_np.T) @ components_np

	# 3. Add back and clip
	X_noisy = X_flat + projected_noise
	X_noisy = np.clip(X_noisy, 0, 1)

	if is_tensor:
	return torch.from_numpy(X_noisy).float().view(orig_shape)
	else:
	return X_noisy.reshape(orig_shape)

	def add_blur(X, kernel_size):
	"""Unified blur for torch Tensors (4D: B, C, H, W)."""
	if kernel_size <= 1:
	return X
	sigma = 0.1 + 0.3 * (kernel_size // 2)
	blur_fn = transforms.GaussianBlur(kernel_size=(kernel_size, kernel_size), sigma=(sigma, sigma))
	return blur_fn(X)

	def evaluate_classifier(model, X, y, device="cpu", is_pytorch=True):
	"""
	Unified evaluation function.
	Handles PyTorch models (CNN, Hybrid) and Sklearn pipelines (SVD+LR).
	"""
	if is_pytorch:
	model.eval()
	model.to(device)
	# Ensure X is 4D for CNN (B, 1, 28, 28)
	if len(X.shape) == 2:
	X_t = torch.as_tensor(X.reshape(-1, 1, 28, 28), dtype=torch.float32).to(device)
	else:
	X_t = torch.as_tensor(X, dtype=torch.float32).to(device)

	y_t = torch.as_tensor(y, dtype=torch.long).to(device)

	with torch.no_grad():
	logits = model(X_t)
	preds = torch.argmax(logits, dim=1).cpu().numpy()
	return accuracy_score(y, preds)
	else:
	# Sklearn pipeline - Ensure X is flattened 2D numpy
	if torch.is_tensor(X):
	X_np = X.view(X.size(0), -1).cpu().numpy()
	else:
	X_np = X.reshape(X.shape[0], -1)
	preds = model.predict(X_np)
	return accuracy_score(y, preds)