cis519projectA
/

Ensemble_ConvNeXt_MobileNet_EfficientNet_Weight_Adjustment

Model card Files Files and versions

xet

Community

jiayicccc commited on Dec 12, 2024

Commit

95e0033

verified ·

1 Parent(s): 7253a0e

Create README.md

Browse files

Files changed (1) hide show

README.md +188 -0

README.md ADDED Viewed

	@@ -0,0 +1,188 @@

+# Image to GPS Project - ConvNext, MobileNet and EfficientNet Ensemble
+```bash
+## Training Data Statistics
+lat_mean = 39.951537011424264
+lat_std = 0.0006940325318781937
+lon_mean = -75.19152009539549
+lon_std = 0.0007607716964655242
+```
+## How to Load the Model and Perform Inference
+```bash
+# install dependencies
+pip install geopy datasets torch torchvision huggingface_hub
+# import packages
+import numpy as np
+from geopy.distance import geodesic
+import torch
+from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
+import torch.nn as nn
+from torchvision.models import mobilenet_v2, MobileNet_V2_Weights, convnext_tiny, ConvNeXt_Tiny_Weights, efficientnet_b0, EfficientNet_B0_Weights
+from datasets import load_dataset
+from huggingface_hub import hf_hub_download
+# load the model
+repo_id = "cis519projectA/Ensemble_ConvNeXt_MobileNet_EfficientNet_Weight_Adjustment"
+filename = "custom_ensemble_weight_adjust.pth"
+model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+# define models
+class CustomEfficientNetModel(nn.Module):
+    def __init__(self, weights=EfficientNet_B0_Weights.DEFAULT, num_classes=2):
+        super().__init__()
+        self.efficientnet = efficientnet_b0(weights=weights)
+        in_features = self.efficientnet.classifier[1].in_features
+        self.efficientnet.classifier = nn.Sequential(
+            nn.Linear(in_features, 512),
+            nn.ReLU(),
+            nn.Dropout(p=0.3),
+            nn.Linear(512, num_classes)
+        )
+        for param in self.efficientnet.features[:3].parameters():
+            param.requires_grad = False
+    def forward(self, x):
+        return self.efficientnet(x)
+class CustomConvNeXtModel(nn.Module):
+    def __init__(self, weights=ConvNeXt_Tiny_Weights.DEFAULT, num_classes=2):
+        super().__init__()
+        self.convnext = convnext_tiny(weights=weights)
+        in_features = self.convnext.classifier[2].in_features
+        self.convnext.classifier = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Flatten(),
+            nn.Linear(in_features, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(p=0.3),
+            nn.Linear(512, num_classes)
+        )
+        for param in self.convnext.features[:4].parameters():
+            param.requires_grad = False
+    def forward(self, x):
+        return self.convnext(x)
+class CustomMobileNetModel(nn.Module):
+    def __init__(self, weights=MobileNet_V2_Weights.DEFAULT, num_classes=2):
+        super().__init__()
+        self.mobilenet = mobilenet_v2(weights=weights)
+        in_features = self.mobilenet.classifier[1].in_features
+        self.mobilenet.classifier = nn.Sequential(
+            nn.Linear(in_features, 1024),
+            nn.ReLU(),
+            nn.Dropout(p=0.5),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(p=0.5),
+            nn.Linear(512, num_classes)
+        )
+        for param in self.mobilenet.features[:5].parameters():
+            param.requires_grad = False
+    def forward(self, x):
+        return self.mobilenet(x)
+class EnsembleModel(nn.Module):
+    def __init__(self, convnext_model, mobilenet_model, efficientnet_model, num_classes=2):
+        super().__init__()
+        self.convnext = convnext_model
+        self.mobilenet = mobilenet_model
+        self.efficientnet = efficientnet_model
+        self.weight_convnext = nn.Parameter(torch.tensor(1.0))
+        self.weight_mobilenet = nn.Parameter(torch.tensor(1.0))
+        self.weight_efficientnet = nn.Parameter(torch.tensor(1.0))
+        self.fc = nn.Sequential(
+            nn.Linear(num_classes * 3, 512),
+            nn.ReLU(),
+            nn.Dropout(p=0.3),
+            nn.Linear(512, num_classes)
+        )
+    def forward(self, x):
+        convnext_out = self.convnext(x)
+        mobilenet_out = self.mobilenet(x)
+        efficientnet_out = self.efficientnet(x)
+        weights = torch.softmax(torch.stack([self.weight_convnext, self.weight_mobilenet, self.weight_efficientnet]), dim=0)
+        combined = (weights[0] * convnext_out +
+                    weights[1] * mobilenet_out +
+                    weights[2] * efficientnet_out)
+        return combined
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+convnext_model = CustomConvNeXtModel(weights=ConvNeXt_Tiny_Weights.DEFAULT, num_classes=2)
+mobilenet_model = CustomMobileNetModel(weights=MobileNet_V2_Weights.DEFAULT, num_classes=2)
+efficientnet_model = CustomEfficientNetModel(weights=EfficientNet_B0_Weights.DEFAULT, num_classes=2)
+ensemble_model = EnsembleModel(convnext_model, mobilenet_model, efficientnet_model, num_classes=2).to(device)
+# load the model weights
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+state_dict = torch.load(model_path, map_location=device)
+ensemble_model.load_state_dict(state_dict)
+ensemble_model.to(device)
+ensemble_model.eval()
+# load the dataset
+dataset_test = load_dataset("gydou/released_img", split="train")
+# define transformers
+inference_transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+# Parameters for denormalization
+lat_mean = 39.951537011424264
+lat_std = 0.0006940325318781937
+lon_mean = -75.19152009539549
+lon_std = 0.0007607716964655242
+class GPSImageDataset(Dataset):
+    def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None):
+        self.hf_dataset = hf_dataset
+        self.transform = transform
+        self.latitude_mean = lat_mean
+        self.latitude_std = lat_std
+        self.longitude_mean = lon_mean
+        self.longitude_std = lon_std
+    def __len__(self):
+        return len(self.hf_dataset)
+    def __getitem__(self, idx):
+        example = self.hf_dataset[idx]
+        image = example['image']
+        latitude = example['Latitude']
+        longitude = example['Longitude']
+        if self.transform:
+            image = self.transform(image)
+        latitude = (latitude - self.latitude_mean) / self.latitude_std
+        longitude = (longitude - self.longitude_mean) / self.longitude_std
+        gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32)
+        return image, gps_coords
+# transform test data
+test_dataset = GPSImageDataset(
+    hf_dataset=dataset_test,
+    transform=inference_transform,
+    lat_mean=lat_mean,
+    lat_std=lat_std,
+    lon_mean=lon_mean,
+    lon_std=lon_std
+)
+test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
+# evaluate
+def evaluate_model_single_batch(model, dataloader, lat_mean, lat_std, lon_mean, lon_std):
+    all_distances = []
+    model.eval()
+    with torch.no_grad():
+        for batch_idx, (images, gps_coords) in enumerate(dataloader):
+            images, gps_coords = images.to(device), gps_coords.to(device)
+            outputs = model(images)
+            preds_denorm = outputs.cpu().numpy() * np.array([lat_std, lon_std]) + np.array([lat_mean, lon_mean])
+            actuals_denorm = gps_coords.cpu().numpy() * np.array([lat_std, lon_std]) + np.array([lat_mean, lon_mean])
+            for pred, actual in zip(preds_denorm, actuals_denorm):
+                distance = geodesic((actual[0], actual[1]), (pred[0], pred[1])).meters
+                all_distances.append(distance)
+            break
+    mean_error = np.mean(all_distances)
+    rmse_error = np.sqrt(np.mean(np.square(all_distances)))
+    return mean_error, rmse_error
+# Evaluate using only one batch
+mean_error, rmse_error = evaluate_model_single_batch(
+    ensemble_model, test_dataloader, lat_mean, lat_std, lon_mean, lon_std
+)
+print(f"Mean Error (meters): {mean_error:.2f}, RMSE (meters): {rmse_error:.2f}")
+```