| This contains the instruction for running model 2 | |
| ### Training data mean and std | |
| lat_mean: 39.95156937654321 | |
| lat_std: 0.0005992518588323268 | |
| lon_mean: -75.19136795987654 | |
| lon_std: 0.0007030395253318959 | |
| ### Instruction to run and test the model | |
| Relevant imports | |
| ```python | |
| from transformers import PretrainedConfig | |
| import torch.nn as nn | |
| import torch | |
| import torchvision.models as models | |
| import torchvision.transforms as transforms | |
| from torch.utils.data import DataLoader, Dataset | |
| from transformers import AutoImageProcessor, AutoModelForImageClassification | |
| from huggingface_hub import PyTorchModelHubMixin | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| lat_mean = 39.95156937654321 | |
| lat_std = 0.0005992518588323268 | |
| lon_mean = -75.19136795987654 | |
| lon_std = 0.0007030395253318959 | |
| ``` | |
| Our model uses the CustomModel class. To use the model, first run the class definition. | |
| ```python | |
| from transformers import PretrainedConfig | |
| class CustomResNetConfig(PretrainedConfig): | |
| model_type = "custom-resnet" | |
| def __init__(self, num_labels=2, **kwargs): | |
| super().__init__(**kwargs) | |
| self.num_labels = num_labels | |
| class CustomResNetModel(nn.Module, PyTorchModelHubMixin): | |
| config_class = CustomResNetConfig | |
| def __init__(self, model_name="microsoft/resnet-18", | |
| num_classes=2, | |
| train_final_layer_only=False): | |
| super().__init__() | |
| # Load pre-trained ResNet model from Hugging Face | |
| self.resnet = AutoModelForImageClassification.from_pretrained(model_name) | |
| # Access the Linear layer within the Sequential classifier | |
| in_features = self.resnet.classifier[1].in_features | |
| # Modify the classifier layer to have the desired number of output classes | |
| self.resnet.classifier = nn.Sequential( | |
| nn.Flatten(), | |
| nn.Linear(in_features, 128), | |
| nn.BatchNorm1d(128), | |
| nn.ReLU(), | |
| nn.Dropout(p=0.5), | |
| nn.Linear(128, num_classes) | |
| ) | |
| self.config = CustomResNetConfig(num_labels=num_classes) | |
| # Freeze previous weights | |
| if train_final_layer_only: | |
| for name, param in self.resnet.named_parameters(): | |
| if "classifier" not in name: | |
| param.requires_grad = False | |
| else: | |
| print(f"Unfrozen layer: {name}") | |
| def forward(self, x): | |
| return self.resnet(x) | |
| def save_pretrained(self, save_directory, **kwargs): | |
| """Save model weights and custom configuration in Hugging Face format.""" | |
| os.makedirs(save_directory, exist_ok=True) | |
| # Save model weights | |
| torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin")) | |
| # Save configuration | |
| self.config.save_pretrained(save_directory) | |
| @classmethod | |
| def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs): | |
| """Load model weights and configuration from Hugging Face Hub or local directory.""" | |
| # Download pytorch_model.bin from Hugging Face Hub | |
| model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin") | |
| # Download config.json from Hugging Face Hub | |
| config_path = hf_hub_download(repo_id=repo_id, filename="config.json") | |
| # Load configuration | |
| config = CustomResNetConfig.from_pretrained(config_path) | |
| # Create the model | |
| model = cls(model_name=model_name, num_classes=config.num_labels) | |
| # Load state_dict | |
| model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) | |
| return model | |
| ``` | |
| Then load the model weights from huggingface from our repo. | |
| ```python | |
| REPO_MODEL_NAME = "final-project-5190/model-2" | |
| BACKBONE_MODEL_NAME = "microsoft/resnet-50" | |
| model=CustomResNetModel.from_pretrained(REPO_MODEL_NAME, model_name=BACKBONE_MODEL_NAME) | |
| ``` | |
| Now use the model for inference. Here is an example we ran on the release dataset. | |
| ```python | |
| # Load test data | |
| release_data = load_dataset("gydou/released_img", split="train") | |
| # Create dataset and dataloader using training mean and std | |
| rel_dataset = GPSImageDataset( | |
| hf_dataset=release_data, | |
| transform=inference_transform, | |
| lat_mean=lat_mean, | |
| lat_std=lat_std, | |
| lon_mean=lon_mean, | |
| lon_std=lon_std | |
| ) | |
| rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False) | |
| # Print MSE and root MSE | |
| from sklearn.metrics import mean_absolute_error, mean_squared_error | |
| # Ensure model is on the correct device | |
| model = model.to(device) | |
| # Initialize lists to store predictions and actual values | |
| all_preds = [] | |
| all_actuals = [] | |
| model.eval() | |
| with torch.no_grad(): | |
| for images, gps_coords in rel_dataloader: | |
| images, gps_coords = images.to(device), gps_coords.to(device) | |
| # Forward pass | |
| outputs = model(images) | |
| # Extract logits (predictions) | |
| logits = outputs.logits # Use .logits to get the tensor | |
| # Denormalize predictions and actual values | |
| preds = logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) | |
| actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) | |
| all_preds.append(preds) | |
| all_actuals.append(actuals) | |
| # Concatenate all batches | |
| all_preds = torch.cat(all_preds).numpy() | |
| all_actuals = torch.cat(all_actuals).numpy() | |
| # Compute error metrics | |
| mae = mean_absolute_error(all_actuals, all_preds) | |
| rmse = mean_squared_error(all_actuals, all_preds, squared=False) | |
| print(f'Release Dataset Mean Absolute Error: {mae}') | |
| print(f'Release Dataset Root Mean Squared Error: {rmse}') | |
| # Convert predictions and actuals to meters | |
| latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine | |
| meters_per_degree_latitude = 111000 # Constant | |
| meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean | |
| all_preds_meters = all_preds.copy() | |
| all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters | |
| all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters | |
| all_actuals_meters = all_actuals.copy() | |
| all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters | |
| all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters | |
| # Compute error metrics in meters | |
| mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters) | |
| rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False) | |
| print(f"Mean Absolute Error (meters): {mae_meters:.2f}") | |
| print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}") | |
| ``` | |
| After running the inference, the following results are printed - | |
| ``` | |
| Release Dataset Mean Absolute Error: 0.00046400768003540093 | |
| Release Dataset Root Mean Squared Error: 0.0005684648079729969 | |
| Mean Absolute Error (meters): 45.92 | |
| Root Mean Squared Error (meters): 56.18 | |
| ``` |