phase1b-15epochs / script.py

Upload 2 files

087a54a verified about 2 months ago

8.12 kB

	"""
	Inference script for CVGGNet-16 Ultra-Optimized
	Compatible with the lightweight model architecture
	"""

	import os
	import torch
	import torch.nn as nn
	from torchvision import models, transforms
	from PIL import Image
	import pandas as pd
	import numpy as np
	import cv2
	from tqdm import tqdm


	# ==================== CBAM MODULES (must match training) ====================

	class ChannelAttention(nn.Module):
	def __init__(self, channels, reduction=16):
	super(ChannelAttention, self).__init__()
	self.avg_pool = nn.AdaptiveAvgPool2d(1)
	self.max_pool = nn.AdaptiveMaxPool2d(1)

	self.fc = nn.Sequential(
	nn.Conv2d(channels, channels // reduction, 1, bias=False),
	nn.ReLU(inplace=True),
	nn.Conv2d(channels // reduction, channels, 1, bias=False)
	)
	self.sigmoid = nn.Sigmoid()

	def forward(self, x):
	avg_out = self.fc(self.avg_pool(x))
	max_out = self.fc(self.max_pool(x))
	out = avg_out + max_out
	return self.sigmoid(out)


	class SpatialAttention(nn.Module):
	def __init__(self, kernel_size=7):
	super(SpatialAttention, self).__init__()
	self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
	self.sigmoid = nn.Sigmoid()

	def forward(self, x):
	avg_out = torch.mean(x, dim=1, keepdim=True)
	max_out, _ = torch.max(x, dim=1, keepdim=True)
	x = torch.cat([avg_out, max_out], dim=1)
	x = self.conv(x)
	return self.sigmoid(x)


	class CBAM(nn.Module):
	def __init__(self, channels, reduction=16, kernel_size=7):
	super(CBAM, self).__init__()
	self.channel_attention = ChannelAttention(channels, reduction)
	self.spatial_attention = SpatialAttention(kernel_size)

	def forward(self, x):
	x = x * self.channel_attention(x)
	x = x * self.spatial_attention(x)
	return x


	# ==================== MODEL ARCHITECTURE ====================

	class CVGGNet16UltraOptimized(nn.Module):
	"""Ultra-optimized CVGGNet-16 with lightweight classifier"""

	def __init__(self, num_classes=3, pretrained=False):
	super(CVGGNet16UltraOptimized, self).__init__()

	# Load VGG-16 backbone
	vgg16 = models.vgg16(pretrained=pretrained)
	self.features = vgg16.features

	# CBAM attention
	self.cbam = CBAM(channels=512, reduction=16)

	# Pooling
	self.avgpool = nn.AdaptiveAvgPool2d((7, 7))

	# LIGHTWEIGHT Classifier (matches training architecture)
	self.classifier = nn.Sequential(
	nn.Linear(512 * 7 * 7, 512),
	nn.ReLU(inplace=True),
	nn.Dropout(0.6),
	nn.Linear(512, 128),
	nn.ReLU(inplace=True),
	nn.Dropout(0.5),
	nn.Linear(128, num_classes)
	)

	def forward(self, x):
	x = self.features(x)
	x = self.cbam(x)
	x = self.avgpool(x)
	x = torch.flatten(x, 1)
	x = self.classifier(x)
	return x


	# ==================== BILATERAL FILTER ====================

	def rapid_bilateral_filter(image, radius=3, sigma_color=30, sigma_space=80):
	"""Rapid Bilateral Filter preprocessing"""
	if isinstance(image, Image.Image):
	image = np.array(image)

	filtered = cv2.bilateralFilter(image, radius, sigma_color, sigma_space)
	return filtered


	# ==================== INFERENCE FUNCTION ====================

	def run_inference(test_images_path, model, image_size, submission_csv_path,
	use_bilateral_filter=True, device='cpu'):
	"""
	Run inference on test images

	Args:
	test_images_path: Path to test images directory
	model: Trained model
	image_size: Input image size (single int for square images)
	submission_csv_path: Path to save predictions CSV
	use_bilateral_filter: Whether to apply bilateral filter preprocessing
	device: Device to run inference on ('cpu' or 'cuda')
	"""

	model.eval()
	model = model.to(device)

	# Get test images
	test_images = sorted(os.listdir(test_images_path))

	# Preprocessing transform
	test_transform = transforms.Compose([
	transforms.Resize((image_size, image_size)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	])

	predictions = []

	print(f"Running inference on {len(test_images)} images...")

	for image_name in tqdm(test_images):
	img_path = os.path.join(test_images_path, image_name)
	image = Image.open(img_path).convert('RGB')

	# Apply bilateral filter if enabled
	if use_bilateral_filter:
	image = rapid_bilateral_filter(image)
	image = Image.fromarray(image)

	# Preprocess
	img_tensor = test_transform(image).unsqueeze(0).to(device)

	# Predict
	with torch.no_grad():
	output = model(img_tensor)
	pred = torch.argmax(output, dim=1).cpu().item()
	predictions.append(pred)

	# Create submission DataFrame
	df_predictions = pd.DataFrame({
	'file_name': test_images,
	'category_id': predictions
	})

	# Save to CSV
	df_predictions.to_csv(submission_csv_path, index=False)
	print(f"\n✓ Predictions saved to: {submission_csv_path}")

	# Display prediction distribution
	print("\nPrediction Distribution:")
	for class_id in range(3):
	count = (df_predictions['category_id'] == class_id).sum()
	percentage = 100 * count / len(df_predictions)
	print(f" Class {class_id}: {count} images ({percentage:.1f}%)")

	return df_predictions


	# ==================== MAIN SCRIPT ====================

	if __name__ == "__main__":

	# Paths
	current_directory = os.path.dirname(os.path.abspath(__file__))
	TEST_IMAGE_PATH = "/tmp/data/test_images" # Update for HuggingFace
	MODEL_WEIGHTS_PATH = os.path.join(current_directory, "cvggnet_optimized_small.pth")
	SUBMISSION_CSV_SAVE_PATH = os.path.join(current_directory, "submission.csv")

	# Configuration
	NUM_CLASSES = 3
	IMAGE_SIZE = 224
	USE_BILATERAL_FILTER = True
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	print("="*60)
	print("CVGGNet-16 Ultra-Optimized Inference")
	print("="*60)
	print(f"Device: {DEVICE}")
	print(f"Model weights: {MODEL_WEIGHTS_PATH}")
	print(f"Test images: {TEST_IMAGE_PATH}")
	print(f"Output: {SUBMISSION_CSV_SAVE_PATH}")
	print(f"Bilateral filter: {USE_BILATERAL_FILTER}")
	print("="*60 + "\n")

	# Load model
	print("Loading model...")
	model = CVGGNet16UltraOptimized(num_classes=NUM_CLASSES, pretrained=False)

	# Load weights
	checkpoint = torch.load(MODEL_WEIGHTS_PATH, map_location=torch.device(DEVICE))

	# Handle different checkpoint formats
	if 'model_state_dict' in checkpoint:
	model.load_state_dict(checkpoint['model_state_dict'])
	print(f"✓ Model loaded from epoch {checkpoint.get('epoch', 'unknown')}")
	print(f" Validation accuracy: {checkpoint.get('val_acc', 'unknown'):.2f}%")
	else:
	model.load_state_dict(checkpoint)
	print("✓ Model weights loaded")

	# Check model size
	model_size_bytes = os.path.getsize(MODEL_WEIGHTS_PATH)
	model_size_mb = model_size_bytes / (1024**2)
	print(f" Model size: {model_size_mb:.1f} MB\n")

	# Run inference
	predictions_df = run_inference(
	test_images_path=TEST_IMAGE_PATH,
	model=model,
	image_size=IMAGE_SIZE,
	submission_csv_path=SUBMISSION_CSV_SAVE_PATH,
	use_bilateral_filter=USE_BILATERAL_FILTER,
	device=DEVICE
	)

	print("\n✓ Inference complete!")