| # Image to GPS Project - ConvNext, MobileNet and EfficientNet Ensemble | |
| ```bash | |
| ## Training Data Statistics | |
| lat_mean = 39.951537011424264 | |
| lat_std = 0.0006940325318781937 | |
| lon_mean = -75.19152009539549 | |
| lon_std = 0.0007607716964655242 | |
| ``` | |
| ## How to Load the Model and Perform Inference | |
| ```bash | |
| # install dependencies | |
| pip install geopy datasets torch torchvision huggingface_hub | |
| # import packages | |
| import numpy as np | |
| from geopy.distance import geodesic | |
| import torch | |
| from torch.utils.data import DataLoader, Dataset | |
| from torchvision import transforms | |
| import torch.nn as nn | |
| from torchvision.models import mobilenet_v2, MobileNet_V2_Weights, convnext_tiny, ConvNeXt_Tiny_Weights, efficientnet_b0, EfficientNet_B0_Weights | |
| from datasets import load_dataset | |
| from huggingface_hub import hf_hub_download | |
| # load the model | |
| repo_id = "cis519projectA/Ensemble_ConvNeXt_MobileNet_EfficientNet" | |
| filename = "ensemble_triple.pth" | |
| model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
| # define models | |
| class CustomEfficientNetModel(nn.Module): | |
| def __init__(self, weights=EfficientNet_B0_Weights.DEFAULT, num_classes=2): | |
| super().__init__() | |
| self.efficientnet = efficientnet_b0(weights=weights) | |
| in_features = self.efficientnet.classifier[1].in_features | |
| self.efficientnet.classifier = nn.Sequential( | |
| nn.Linear(in_features, 512), | |
| nn.ReLU(), | |
| nn.Dropout(p=0.3), | |
| nn.Linear(512, num_classes) | |
| ) | |
| for param in self.efficientnet.features[:3].parameters(): | |
| param.requires_grad = False | |
| def forward(self, x): | |
| return self.efficientnet(x) | |
| class CustomConvNeXtModel(nn.Module): | |
| def __init__(self, weights=ConvNeXt_Tiny_Weights.DEFAULT, num_classes=2): | |
| super().__init__() | |
| self.convnext = convnext_tiny(weights=weights) | |
| in_features = self.convnext.classifier[2].in_features | |
| self.convnext.classifier = nn.Sequential( | |
| nn.AdaptiveAvgPool2d(1), | |
| nn.Flatten(), | |
| nn.Linear(in_features, 512), | |
| nn.BatchNorm1d(512), | |
| nn.ReLU(), | |
| nn.Dropout(p=0.3), | |
| nn.Linear(512, num_classes) | |
| ) | |
| for param in self.convnext.features[:4].parameters(): | |
| param.requires_grad = False | |
| def forward(self, x): | |
| return self.convnext(x) | |
| class CustomMobileNetModel(nn.Module): | |
| def __init__(self, weights=MobileNet_V2_Weights.DEFAULT, num_classes=2): | |
| super().__init__() | |
| self.mobilenet = mobilenet_v2(weights=weights) | |
| in_features = self.mobilenet.classifier[1].in_features | |
| self.mobilenet.classifier = nn.Sequential( | |
| nn.Linear(in_features, 1024), | |
| nn.ReLU(), | |
| nn.Dropout(p=0.5), | |
| nn.Linear(1024, 512), | |
| nn.ReLU(), | |
| nn.Dropout(p=0.5), | |
| nn.Linear(512, num_classes) | |
| ) | |
| for param in self.mobilenet.features[:5].parameters(): | |
| param.requires_grad = False | |
| def forward(self, x): | |
| return self.mobilenet(x) | |
| class EnsembleModel(nn.Module): | |
| def __init__(self, convnext_model, mobilenet_model, efficientnet_model, num_classes=2): | |
| super().__init__() | |
| self.convnext = convnext_model | |
| self.mobilenet = mobilenet_model | |
| self.efficientnet = efficientnet_model | |
| self.weight_convnext = nn.Parameter(torch.tensor(1.0)) | |
| self.weight_mobilenet = nn.Parameter(torch.tensor(1.0)) | |
| self.weight_efficientnet = nn.Parameter(torch.tensor(1.0)) | |
| self.fc = nn.Sequential( | |
| nn.Linear(num_classes * 3, 512), | |
| nn.ReLU(), | |
| nn.Dropout(p=0.3), | |
| nn.Linear(512, num_classes) | |
| ) | |
| def forward(self, x): | |
| convnext_out = self.convnext(x) | |
| mobilenet_out = self.mobilenet(x) | |
| efficientnet_out = self.efficientnet(x) | |
| weights = torch.softmax(torch.stack([self.weight_convnext, self.weight_mobilenet, self.weight_efficientnet]), dim=0) | |
| combined = (weights[0] * convnext_out + | |
| weights[1] * mobilenet_out + | |
| weights[2] * efficientnet_out) | |
| output = self.fc(torch.cat((convnext_out, mobilenet_out, efficientnet_out), dim=1)) | |
| return output | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| convnext_model = CustomConvNeXtModel(weights=ConvNeXt_Tiny_Weights.DEFAULT, num_classes=2) | |
| mobilenet_model = CustomMobileNetModel(weights=MobileNet_V2_Weights.DEFAULT, num_classes=2) | |
| efficientnet_model = CustomEfficientNetModel(weights=EfficientNet_B0_Weights.DEFAULT, num_classes=2) | |
| ensemble_model = EnsembleModel(convnext_model, mobilenet_model, efficientnet_model, num_classes=2).to(device) | |
| # load the model weights | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| state_dict = torch.load(model_path, map_location=device) | |
| ensemble_model.load_state_dict(state_dict) | |
| ensemble_model.to(device) | |
| ensemble_model.eval() | |
| # load the dataset | |
| dataset_test = load_dataset("gydou/released_img", split="train") | |
| # define transformers | |
| inference_transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |
| ]) | |
| # Parameters for denormalization | |
| lat_mean = 39.951537011424264 | |
| lat_std = 0.0006940325318781937 | |
| lon_mean = -75.19152009539549 | |
| lon_std = 0.0007607716964655242 | |
| class GPSImageDataset(Dataset): | |
| def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None): | |
| self.hf_dataset = hf_dataset | |
| self.transform = transform | |
| self.latitude_mean = lat_mean | |
| self.latitude_std = lat_std | |
| self.longitude_mean = lon_mean | |
| self.longitude_std = lon_std | |
| def __len__(self): | |
| return len(self.hf_dataset) | |
| def __getitem__(self, idx): | |
| example = self.hf_dataset[idx] | |
| image = example['image'] | |
| latitude = example['Latitude'] | |
| longitude = example['Longitude'] | |
| if self.transform: | |
| image = self.transform(image) | |
| latitude = (latitude - self.latitude_mean) / self.latitude_std | |
| longitude = (longitude - self.longitude_mean) / self.longitude_std | |
| gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32) | |
| return image, gps_coords | |
| # transform test data | |
| test_dataset = GPSImageDataset( | |
| hf_dataset=dataset_test, | |
| transform=inference_transform, | |
| lat_mean=lat_mean, | |
| lat_std=lat_std, | |
| lon_mean=lon_mean, | |
| lon_std=lon_std | |
| ) | |
| test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4) | |
| # evaluate | |
| def evaluate_model_single_batch(model, dataloader, lat_mean, lat_std, lon_mean, lon_std): | |
| all_distances = [] | |
| model.eval() | |
| with torch.no_grad(): | |
| for batch_idx, (images, gps_coords) in enumerate(dataloader): | |
| images, gps_coords = images.to(device), gps_coords.to(device) | |
| outputs = model(images) | |
| preds_denorm = outputs.cpu().numpy() * np.array([lat_std, lon_std]) + np.array([lat_mean, lon_mean]) | |
| actuals_denorm = gps_coords.cpu().numpy() * np.array([lat_std, lon_std]) + np.array([lat_mean, lon_mean]) | |
| for pred, actual in zip(preds_denorm, actuals_denorm): | |
| distance = geodesic((actual[0], actual[1]), (pred[0], pred[1])).meters | |
| all_distances.append(distance) | |
| break | |
| mean_error = np.mean(all_distances) | |
| rmse_error = np.sqrt(np.mean(np.square(all_distances))) | |
| return mean_error, rmse_error | |
| # Evaluate using only one batch | |
| mean_error, rmse_error = evaluate_model_single_batch( | |
| ensemble_model, test_dataloader, lat_mean, lat_std, lon_mean, lon_std | |
| ) | |
| print(f"Mean Error (meters): {mean_error:.2f}, RMSE (meters): {rmse_error:.2f}") | |
| ``` |