--- language: - en tags: - xception - gps-prediction - mean - standard deviation metrics: - mae - rmse --- # Custom Xception Model This is a fine-tuned Xception model's ensemble for predicting latitude and longitude from images. ## Model Metadata - **Latitude Mean**: 39.95165153939056 - **Latitude Std**: 0.0007248140892687559 - **Longitude Mean**: -75.19139496469714 - **Longitude Std**: 0.0007013685468922234 ## Error Metrics - **Mean Absolute Error (MAE)**: 0.00020775681686579616 - **Root Mean Squared Error (RMSE)**: 0.0003053099508331751 ## Model Evaluation ```python import torch from transformers import AutoImageProcessor, AutoModelForImageClassification from huggingface_hub import hf_hub_download, login import torch.nn as nn from datasets import load_dataset import torchvision.transforms as transforms from torch.utils.data import DataLoader, Dataset import numpy as np from geopy.distance import geodesic from timm import create_model login(token="") class CustomXceptionModel(nn.Module): def __init__(self, model_name="legacy_xception", num_classes=2, metadata=None): super().__init__() self.metadata = metadata if metadata is not None else {} self.xception = create_model(model_name, pretrained=False) in_features = self.xception.fc.in_features self.xception.fc = nn.Linear(in_features, 2) def forward(self, x): return self.xception(x) model_path = hf_hub_download(repo_id="aaaimg2gps/ensemble", filename="best_bagging_models.pth") model_list = torch.load(model_path) for i in range(len(model_list)): model_list[i] = model_list[i].to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) lat_mean = 39.95165153939056 lat_std = 0.0007248140892687559 lon_mean = -75.19139496469714 lon_std = 0.0007013685468922234 #test dataset dataset_test = load_dataset("gydou/released_img", split="train") inference_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) class GPSImageDataset(Dataset): def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None): self.hf_dataset = hf_dataset self.transform = transform self.latitude_mean = lat_mean self.latitude_std = lat_std self.longitude_mean = lon_mean self.longitude_std = lon_std def __len__(self): return len(self.hf_dataset) def __getitem__(self, idx): example = self.hf_dataset[idx] image = example['image'] latitude = example['Latitude'] longitude = example['Longitude'] if self.transform: image = self.transform(image) latitude = (latitude - self.latitude_mean) / self.latitude_std longitude = (longitude - self.longitude_mean) / self.longitude_std gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32) return image, gps_coords test_dataset = GPSImageDataset( hf_dataset=dataset_test, transform=inference_transform, lat_mean=lat_mean, lat_std=lat_std, lon_mean=lon_mean, lon_std=lon_std ) test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False) def weighted_mean(outputs_list): outputs_stack = torch.stack(outputs_list, dim=0) # (num_models, batch_size, 2) weighted_outputs = [] epsilon = 1e-6 for i in range(outputs_stack.size(1)): sample_preds = outputs_stack[:, i, :] distances = torch.cdist(sample_preds, sample_preds, p=2) avg_distances = distances.mean(dim=1) weights = 1 / (avg_distances + epsilon) normalized_weights = weights / weights.sum() weighted_output = (sample_preds * normalized_weights[:, None]).sum(dim=0) weighted_outputs.append(weighted_output) return torch.stack(weighted_outputs, dim=0) def evaluate_model(model_list, dataloader, device): distances = [] with torch.no_grad(): for images, gps_coords in dataloader: images = images.to(device) gps_coords = gps_coords.to(device) # prediction of each model outputs_list = [model(images) for model in model_list] # weighted mean of predictions outputs_mean = weighted_mean(outputs_list) # denormalize predictions and actuals preds = outputs_mean.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) for pred, actual in zip(preds.numpy(), actuals.numpy()): distance = geodesic((actual[0], actual[1]), (pred[0], pred[1])).meters distances.append(distance) print(f"Predicted coordinates: ({pred[0]:.6f}, {pred[1]:.6f})") print(f"Actual coordinates: ({actual[0]:.6f}, {actual[1]:.6f})") print(f"Distance error: {distance:.2f} meters") print("---") mean_dist = np.mean(distances) median_dist = np.median(distances) dist_std = np.std(distances) dist_95 = np.percentile(distances, 95) print("\n=== Overall Performance Evaluation ===") print(f'Mean distance error: {mean_dist:.2f} meters') print(f'Median distance error: {median_dist:.2f} meters') print(f'Distance standard deviation: {dist_std:.2f} meters') print(f'95th percentile distance error: {dist_95:.2f} meters') # evaluation device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Starting model evaluation...") evaluate_model(model_list, test_dataloader, device) ```