Jog-sama's picture
initial deployment
af61511
"""
model.py – Define, train, and evaluate all three models:
1. Naive baseline (majority class classifier)
2. Classical ML (Random Forest on HOG features)
3. Deep learning (ScribblNet CNN)
Also runs the training size sensitivity experiment and saves results/plots.
Usage:
python scripts/model.py
"""
import json
import sys
import time
from pathlib import Path
from typing import Any
import joblib
import matplotlib
matplotlib.use("Agg") # headless backend
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
accuracy_score,
classification_report,
confusion_matrix,
)
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import seaborn as sns
sys.path.insert(0, str(Path(__file__).parent.parent))
from config import (
CLASSES,
MODELS_DIR,
OUTPUTS_DIR,
PROCESSED_DIR,
NUM_CLASSES,
RF_MAX_DEPTH,
RF_N_ESTIMATORS,
DEEP_BATCH_SIZE,
DEEP_EPOCHS,
DEEP_LR,
DEEP_WEIGHT_DECAY,
IMG_SIZE,
EXPERIMENT_FRACTIONS,
EXPERIMENT_EPOCHS,
)
# Utility
def get_device() -> torch.device:
"""Return the best available torch device (MPS > CUDA > CPU)."""
if torch.backends.mps.is_available():
return torch.device("mps")
if torch.cuda.is_available():
return torch.device("cuda")
return torch.device("cpu")
def load_processed_data() -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Load all processed arrays from disk.
Returns:
X_train_raw, X_test_raw, y_train, y_test, X_train_hog, X_test_hog
"""
X_train_raw = np.load(PROCESSED_DIR / "X_train_raw.npy")
X_test_raw = np.load(PROCESSED_DIR / "X_test_raw.npy")
y_train = np.load(PROCESSED_DIR / "y_train.npy")
y_test = np.load(PROCESSED_DIR / "y_test.npy")
X_train_hog = np.load(PROCESSED_DIR / "X_train_hog.npy")
X_test_hog = np.load(PROCESSED_DIR / "X_test_hog.npy")
return X_train_raw, X_test_raw, y_train, y_test, X_train_hog, X_test_hog
# 1. Naive Baseline
class MajorityClassifier:
"""Naive baseline: always predicts the most frequent class in training."""
def __init__(self) -> None:
self.majority_class: int = 0
def fit(self, y: np.ndarray) -> "MajorityClassifier":
"""Fit by finding the majority class label.
Args:
y: 1-D array of integer class labels.
Returns:
self
"""
counts = np.bincount(y)
self.majority_class = int(np.argmax(counts))
return self
def predict(self, n_samples: int) -> np.ndarray:
"""Return the majority class repeated n_samples times.
Args:
n_samples: Number of predictions to generate.
Returns:
Array of length n_samples, all equal to majority_class.
"""
return np.full(n_samples, self.majority_class, dtype=np.int64)
def train_naive(y_train: np.ndarray, y_test: np.ndarray) -> dict[str, Any]:
"""Train and evaluate the majority class baseline.
Args:
y_train: Training labels.
y_test: Test labels.
Returns:
Dictionary of evaluation metrics.
"""
print(f"\nNaive Baseline")
clf = MajorityClassifier().fit(y_train)
preds = clf.predict(len(y_test))
acc = accuracy_score(y_test, preds)
print(f" Majority class: {CLASSES[clf.majority_class]}")
print(f" Test accuracy: {acc:.4f}")
model_data = {"majority_class": clf.majority_class, "accuracy": acc}
joblib.dump(model_data, MODELS_DIR / "naive_model.pkl")
return {"model": "naive", "accuracy": acc}
# 2. Classical ML
def train_classical(
X_train_hog: np.ndarray,
X_test_hog: np.ndarray,
y_train: np.ndarray,
y_test: np.ndarray,
) -> dict[str, Any]:
"""Train Random Forest on HOG features and evaluate.
Args:
X_train_hog: Training HOG feature matrix.
X_test_hog: Test HOG feature matrix.
y_train: Training labels.
y_test: Test labels.
Returns:
Dictionary of evaluation metrics.
"""
print(f"\nClassical ML (Random Forest on HOG)")
# Standardise features
scaler = StandardScaler()
X_tr = scaler.fit_transform(X_train_hog)
X_te = scaler.transform(X_test_hog)
clf = RandomForestClassifier(
n_estimators=RF_N_ESTIMATORS,
max_depth=RF_MAX_DEPTH,
n_jobs=-1,
random_state=42,
)
t0 = time.time()
clf.fit(X_tr, y_train)
elapsed = time.time() - t0
preds = clf.predict(X_te)
acc = accuracy_score(y_test, preds)
report = classification_report(y_test, preds, target_names=CLASSES)
print(f" Training time: {elapsed:.1f}s")
print(f" Test accuracy: {acc:.4f}")
print(f"\n{report}")
joblib.dump({"clf": clf, "scaler": scaler}, MODELS_DIR / "classical_model.pkl")
_save_confusion_matrix(y_test, preds, "classical_confusion_matrix.png")
return {"model": "classical", "accuracy": acc, "training_time_s": elapsed}
# 3. Deep Model
class ScribblNet(nn.Module):
"""Lightweight CNN for 28×28 grayscale sketch classification.
Architecture:
3 × (Conv2d → BatchNorm → ReLU → MaxPool)
Dropout → FC(1152→256) → ReLU → Dropout → FC(256→num_classes)
"""
def __init__(self, num_classes: int = NUM_CLASSES) -> None:
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2),
)
# 28→14→7→3 ∴ feature map is 128×3×3 = 1152
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(128 * 3 * 3, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.3),
nn.Linear(256, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Forward pass.
Args:
x: Tensor of shape (B, 1, 28, 28), values in [0, 1].
Returns:
Logits tensor of shape (B, num_classes).
"""
x = self.features(x)
x = x.view(x.size(0), -1)
return self.classifier(x)
def make_dataloaders(
X_raw: np.ndarray,
y: np.ndarray,
X_test_raw: np.ndarray,
y_test: np.ndarray,
batch_size: int = DEEP_BATCH_SIZE,
train_fraction: float = 1.0,
) -> tuple[DataLoader, DataLoader]:
"""Build PyTorch DataLoaders from raw pixel arrays.
Pixel values are normalised to [0, 1]. Training set can be subsampled
via train_fraction for the sensitivity experiment.
Args:
X_raw: Training pixel array (N, 784), uint8.
y: Training labels.
X_test_raw: Test pixel array.
y_test: Test labels.
batch_size: Minibatch size.
train_fraction: Fraction of training samples to use (0 < f ≤ 1).
Returns:
(train_loader, test_loader)
"""
if train_fraction < 1.0:
n = max(1, int(len(X_raw) * train_fraction))
idx = np.random.default_rng(seed=7).permutation(len(X_raw))[:n]
X_raw = X_raw[idx]
y = y[idx]
def _to_tensor(X: np.ndarray, labels: np.ndarray) -> TensorDataset:
imgs = torch.from_numpy(X.astype(np.float32) / 255.0)
imgs = imgs.view(-1, 1, IMG_SIZE, IMG_SIZE)
return TensorDataset(imgs, torch.from_numpy(labels))
train_ds = _to_tensor(X_raw, y)
test_ds = _to_tensor(X_test_raw, y_test)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=0)
return train_loader, test_loader
def train_one_epoch(
model: nn.Module,
loader: DataLoader,
optimizer: torch.optim.Optimizer,
criterion: nn.Module,
device: torch.device,
) -> float:
"""Run one training epoch and return average loss.
Args:
model: ScribblNet instance.
loader: Training DataLoader.
optimizer: Optimiser (Adam).
criterion: Loss function (CrossEntropyLoss).
device: Torch device.
Returns:
Mean loss over all minibatches.
"""
model.train()
total_loss = 0.0
for imgs, labels in loader:
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
loss = criterion(model(imgs), labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(loader)
def evaluate(
model: nn.Module,
loader: DataLoader,
device: torch.device,
) -> tuple[float, np.ndarray]:
"""Evaluate model on a DataLoader.
Args:
model: ScribblNet instance.
loader: Evaluation DataLoader.
device: Torch device.
Returns:
(accuracy, predictions_array)
"""
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
for imgs, labels in loader:
imgs = imgs.to(device)
preds = model(imgs).argmax(dim=1).cpu().numpy()
all_preds.append(preds)
all_labels.append(labels.numpy())
preds = np.concatenate(all_preds)
labels = np.concatenate(all_labels)
return accuracy_score(labels, preds), preds
def train_deep(
X_train_raw: np.ndarray,
X_test_raw: np.ndarray,
y_train: np.ndarray,
y_test: np.ndarray,
epochs: int = DEEP_EPOCHS,
train_fraction: float = 1.0,
save_model: bool = True,
) -> dict[str, Any]:
"""Train ScribblNet and evaluate on test set.
Args:
X_train_raw: Raw training pixel array.
X_test_raw: Raw test pixel array.
y_train: Training labels.
y_test: Test labels.
epochs: Number of training epochs.
train_fraction: Fraction of training data to use.
save_model: Whether to save weights to disk.
Returns:
Dictionary of evaluation metrics and training history.
"""
print(f"\nDeep Model (ScribblNet, fraction={train_fraction:.0%})")
device = get_device()
print(f" Device: {device}")
train_loader, test_loader = make_dataloaders(
X_train_raw, y_train, X_test_raw, y_test, train_fraction=train_fraction
)
model = ScribblNet(num_classes=NUM_CLASSES).to(device)
optimizer = torch.optim.Adam(
model.parameters(), lr=DEEP_LR, weight_decay=DEEP_WEIGHT_DECAY
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
criterion = nn.CrossEntropyLoss()
history = {"loss": [], "val_acc": []}
best_acc = 0.0
for epoch in range(1, epochs + 1):
loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
acc, _ = evaluate(model, test_loader, device)
scheduler.step()
history["loss"].append(loss)
history["val_acc"].append(acc)
print(f" epoch {epoch:02d}/{epochs} loss={loss:.4f} val_acc={acc:.4f}")
if acc > best_acc:
best_acc = acc
if save_model:
torch.save(model.state_dict(), MODELS_DIR / "deep_model.pth")
# Final evaluation with best weights
if save_model:
model.load_state_dict(torch.load(MODELS_DIR / "deep_model.pth", map_location=device))
final_acc, final_preds = evaluate(model, test_loader, device)
print(f"\n Best test accuracy: {best_acc:.4f}")
if save_model:
report = classification_report(y_test, final_preds, target_names=CLASSES)
print(f"\n{report}")
_save_confusion_matrix(y_test, final_preds, "deep_confusion_matrix.png")
_save_training_curves(history)
return {"model": "deep", "accuracy": best_acc, "history": history}
# Experiment: Training Size Sensitivity
def run_experiment(
X_train_raw: np.ndarray,
X_test_raw: np.ndarray,
y_train: np.ndarray,
y_test: np.ndarray,
X_train_hog: np.ndarray,
X_test_hog: np.ndarray,
) -> None:
"""Training set size sensitivity analysis.
Sweeps over EXPERIMENT_FRACTIONS, training both the deep model and Random
Forest at each fraction, then plots accuracy vs number of training samples.
Motivation: Understanding how each model scales with data volume helps
justify architectural choices and highlights when more data is beneficial.
Args:
X_train_raw: Raw training pixels.
X_test_raw: Raw test pixels.
y_train: Training labels.
y_test: Test labels.
X_train_hog: HOG training features.
X_test_hog: HOG test features.
"""
print(f"\nExperiment: Training Size Sensitivity")
deep_accs, rf_accs, n_samples = [], [], []
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test_hog)
for frac in EXPERIMENT_FRACTIONS:
n = int(len(X_train_raw) * frac)
n_samples.append(n)
print(f"\n Fraction={frac:.0%} (n={n})")
# Deep model
result = train_deep(
X_train_raw, X_test_raw, y_train, y_test,
epochs=EXPERIMENT_EPOCHS, train_fraction=frac, save_model=False,
)
deep_accs.append(result["accuracy"])
# Random Forest
idx = np.random.default_rng(seed=42).permutation(len(X_train_hog))[:n]
X_tr = scaler.fit_transform(X_train_hog[idx])
rf = RandomForestClassifier(
n_estimators=100, n_jobs=-1, random_state=42
)
rf.fit(X_tr, y_train[idx])
rf_pred = rf.predict(X_test_scaled)
rf_accs.append(accuracy_score(y_test, rf_pred))
print(f" RF acc={rf_accs[-1]:.4f}")
# Plot
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(n_samples, deep_accs, marker="o", linestyle="solid", label="ScribblNet (CNN)", linewidth=2, markersize=7)
ax.plot(n_samples, rf_accs, marker="s", linestyle="dashed", label="Random Forest (HOG)", linewidth=2, markersize=7)
ax.set_xlabel("Training samples", fontsize=12)
ax.set_ylabel("Test accuracy", fontsize=12)
ax.set_title("Training Set Size Sensitivity", fontsize=14)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_ylim(0, 1)
plt.tight_layout()
out_path = OUTPUTS_DIR / "experiment_sensitivity.png"
fig.savefig(out_path, dpi=150)
plt.close(fig)
print(f"\n Saved experiment plot → {out_path}")
results = {
"fractions": EXPERIMENT_FRACTIONS,
"n_samples": n_samples,
"deep_accs": deep_accs,
"rf_accs": rf_accs,
}
with open(OUTPUTS_DIR / "experiment_results.json", "w") as f:
json.dump(results, f, indent=2)
print(" Saved experiment_results.json")
# Plotting Helpers
def _save_confusion_matrix(
y_true: np.ndarray,
y_pred: np.ndarray,
filename: str,
) -> None:
"""Save a normalised confusion matrix heatmap.
Args:
y_true: Ground truth labels.
y_pred: Predicted labels.
filename: Output filename (saved under OUTPUTS_DIR).
"""
cm = confusion_matrix(y_true, y_pred, normalize="true")
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(
cm,
annot=True,
fmt=".2f",
xticklabels=CLASSES,
yticklabels=CLASSES,
cmap="Blues",
ax=ax,
linewidths=0.5,
)
ax.set_xlabel("Predicted", fontsize=11)
ax.set_ylabel("True", fontsize=11)
ax.set_title(filename.replace("_", " ").replace(".png", "").title(), fontsize=13)
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
fig.savefig(OUTPUTS_DIR / filename, dpi=150)
plt.close(fig)
print(f" Saved {filename}")
def _save_training_curves(history: dict[str, list[float]]) -> None:
"""Save loss and validation accuracy curves for the deep model.
Args:
history: Dict with keys 'loss' and 'val_acc', each a list of per epoch values.
"""
epochs = range(1, len(history["loss"]) + 1)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4))
ax1.plot(epochs, history["loss"], color="steelblue", marker="o", linestyle="solid", markersize=5)
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Training Loss")
ax1.set_title("ScribblNet Training Loss")
ax1.grid(True, alpha=0.3)
ax2.plot(epochs, history["val_acc"], color="seagreen", marker="o", linestyle="solid", markersize=5)
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Validation Accuracy")
ax2.set_title("ScribblNet Validation Accuracy")
ax2.grid(True, alpha=0.3)
plt.tight_layout()
fig.savefig(OUTPUTS_DIR / "deep_training_curves.png", dpi=150)
plt.close(fig)
print(" Saved deep_training_curves.png")
def _save_model_comparison(results: list[dict[str, Any]]) -> None:
"""Bar chart comparing test accuracy across all three models.
Args:
results: List of result dicts each containing 'model' and 'accuracy'.
"""
names = [r["model"].capitalize() for r in results]
accs = [r["accuracy"] for r in results]
fig, ax = plt.subplots(figsize=(7, 4))
bars = ax.bar(names, accs, color=["#94a3b8", "#60a5fa", "#34d399"], width=0.5)
ax.set_ylim(0, 1)
ax.set_ylabel("Test Accuracy")
ax.set_title("Model Comparison")
for bar, acc in zip(bars, accs):
ax.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height() + 0.01,
f"{acc:.3f}",
ha="center",
fontsize=12,
)
ax.grid(True, axis="y", alpha=0.3)
plt.tight_layout()
fig.savefig(OUTPUTS_DIR / "model_comparison.png", dpi=150)
plt.close(fig)
print(" Saved model_comparison.png")
# Orchestrator
def train_all() -> None:
"""Train all three models, run the experiment, and save all artefacts."""
X_train_raw, X_test_raw, y_train, y_test, X_train_hog, X_test_hog = (
load_processed_data()
)
r_naive = train_naive(y_train, y_test)
r_classical = train_classical(X_train_hog, X_test_hog, y_train, y_test)
r_deep = train_deep(X_train_raw, X_test_raw, y_train, y_test)
_save_model_comparison([r_naive, r_classical, r_deep])
run_experiment(
X_train_raw, X_test_raw, y_train, y_test, X_train_hog, X_test_hog
)
summary = {
"naive_accuracy": r_naive["accuracy"],
"classical_accuracy": r_classical["accuracy"],
"deep_accuracy": r_deep["accuracy"],
}
with open(OUTPUTS_DIR / "results_summary.json", "w") as f:
json.dump(summary, f, indent=2)
print("\nTraining complete. Summary:")
for k, v in summary.items():
print(f" {k}: {v:.4f}")
if __name__ == "__main__":
train_all()