| atitude_mean: 39.95184413388056 | |
| latitude_std: 0.0006308700565432299 | |
| longitude_mean: -75.19147985909444 | |
| longitude_std: 0.0006379960634765379 | |
| To run input tensors to `predict_from_model(input_tensor)`: | |
| ``` | |
| import torch | |
| import torch.nn as nn | |
| import torchvision.transforms as transforms | |
| from torch.utils.data import DataLoader, Dataset | |
| from transformers import AutoImageProcessor, AutoModelForImageClassification | |
| from huggingface_hub import PyTorchModelHubMixin | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| def predict_from_model(input_tensor): | |
| import torch | |
| import torchvision.transforms as transforms | |
| import matplotlib.pyplot as plt | |
| from geopy.distance import geodesic | |
| from datasets import load_dataset | |
| from huggingface_hub import hf_hub_download | |
| import numpy as np | |
| torch.cuda.empty_cache() | |
| ############# | |
| path_map = {"best region models/region_model_lr_0.0002_step_10_gamma_0.1_epochs_15.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="best region models/region_model_lr_0.0002_step_10_gamma_0.1_epochs_15.pth"), | |
| "best region models/region_model_lr_0.00035_step_10_gamma_0.1_epochs_50.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="best region models/region_model_lr_0.00035_step_10_gamma_0.1_epochs_50.pth"), | |
| "best region models/region_model_lr_0.0005_step_10_gamma_0.1_epochs_50.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="best region models/region_model_lr_0.0005_step_10_gamma_0.1_epochs_50.pth"), | |
| "best region models/region_model_lr_0.0005_step_10_gamma_0.1_epochs_60.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="best region models/region_model_lr_0.0005_step_10_gamma_0.1_epochs_60.pth"), | |
| "best region models/region_model_lr_0.002_step_10_gamma_0.1_epochs_100.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="best region models/region_model_lr_0.002_step_10_gamma_0.1_epochs_100.pth"), | |
| "best region models/model_histories.json" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="best region models/model_histories.json"), | |
| "models/location_model_0.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_0.pth"), | |
| "models/location_model_1.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_1.pth"), | |
| "models/location_model_2.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_2.pth"), | |
| "models/location_model_3.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_3.pth"), | |
| "models/location_model_4.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_4.pth"), | |
| "models/location_model_5.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_5.pth"), | |
| "models/location_model_6.pth" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="models/location_model_6.pth"), | |
| "region_ensemble_weights.json" : hf_hub_download(repo_id="IanAndJohn/region_ensemble_model", filename="region_ensemble_weights.json")} | |
| ############## | |
| import torch | |
| import torch.nn as nn | |
| import torchvision.transforms as transforms | |
| from torch.utils.data import DataLoader, Dataset | |
| from transformers import AutoImageProcessor, AutoModelForImageClassification | |
| from huggingface_hub import PyTorchModelHubMixin | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| class PredictedObject(): | |
| def __init__(self, image, lat, lon, region, original_lat=None, original_lon=None): | |
| self.lat = lat | |
| self.lon = lon | |
| self.region = region | |
| self.image = image | |
| if original_lat is None or original_lon is None: | |
| self.original_lat = lat | |
| self.original_lon = lon | |
| else: | |
| self.original_lat = original_lat | |
| self.original_lon = original_lon | |
| self.predicted_region = None | |
| self.predicted_lat = None | |
| self.predicted_lon = None | |
| def __lt__(self, other): | |
| return self.predicted_region < other.predicted_region | |
| def __eq__(self, other): | |
| return self.predicted_region == other.predicted_region | |
| class PredictionObjectDataset(Dataset): | |
| def __init__(self, object_lst, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None, useRegions=False, give_originals=False): | |
| self.object_lst = object_lst | |
| self.transform = transform | |
| self.useRegions = useRegions | |
| self.give_originals = give_originals | |
| # Compute mean and std from the dataframe if not provided | |
| if (len(self.object_lst) == 1): | |
| self.latitude_mean = self.object_lst[0].lat | |
| self.latitude_std = 1 | |
| self.longitude_mean = self.object_lst[0].lon | |
| self.longitude_std = 1 | |
| else: | |
| self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array([x.lat for x in self.object_lst])) | |
| self.latitude_std = lat_std if lat_std is not None else np.std(np.array([x.lat for x in self.object_lst])) | |
| self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array([x.lon for x in self.object_lst])) | |
| self.longitude_std = lon_std if lon_std is not None else np.std(np.array([x.lon for x in self.object_lst])) | |
| self.normalize() | |
| def normalize(self): | |
| new_object_lst = [] | |
| for obj in self.object_lst: | |
| obj.lat = (obj.lat - self.latitude_mean) / self.latitude_std | |
| obj.lon = (obj.lon - self.longitude_mean) / self.longitude_std | |
| new_object_lst.append(obj) | |
| self.object_lst = new_object_lst | |
| def __len__(self): | |
| return len(self.object_lst) | |
| def __getitem__(self, idx): | |
| # Extract data | |
| example = self.object_lst[idx] | |
| # Load and process the image | |
| image = example.image | |
| latitude = example.lat | |
| longitude = example.lon | |
| region = example.region | |
| # image = image.rotate(-90, expand=True) | |
| if self.transform: | |
| image = self.transform(image) | |
| # Normalize GPS coordinates | |
| gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32) | |
| gps_coords_orginal = torch.tensor([example.original_lat, example.original_lon], dtype=torch.float32) | |
| if self.useRegions and self.give_originals: | |
| return image, gps_coords, gps_coords_orginal, region | |
| elif self.useRegions: | |
| return image, gps_coords, region | |
| elif self.give_originals: | |
| return image, gps_coords, gps_coords_orginal | |
| else: | |
| return image, gps_coords | |
| class TensorDataset(Dataset): | |
| def __init__(self, tensors, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None, useRegions=False, give_originals=False): | |
| # self.hf_dataset = hf_dataset.map( | |
| self.tensors = tensors | |
| def __len__(self): | |
| return len(self.tensors) | |
| def __getitem__(self, idx): | |
| # Extract data | |
| image = self.tensors[idx] | |
| return image | |
| ################## | |
| transform = transforms.Compose([ | |
| #transforms.RandomResizedCrop(224), # Random crop and resize to 224x224 | |
| transforms.Resize((224, 224)), | |
| transforms.RandomHorizontalFlip(), # Random horizontal flip | |
| # transforms.RandomRotation(degrees=15), # Random rotation between -15 and 15 degrees | |
| transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], | |
| std=[0.229, 0.224, 0.225]) | |
| ]) | |
| # Optionally, you can create a separate transform for inference without augmentations | |
| inference_transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], | |
| std=[0.229, 0.224, 0.225]) | |
| ]) | |
| # Create the training dataset and dataloader | |
| train_dataset = TensorDataset(input_tensor) | |
| train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True) | |
| # lat_mean = train_dataset.latitude_mean | |
| # lat_std = train_dataset.latitude_std | |
| # lon_mean = train_dataset.longitude_mean | |
| # lon_std = train_dataset.longitude_std | |
| ##################### | |
| import torch | |
| import torch.nn as nn | |
| import torchvision.transforms as transforms | |
| from torch.utils.data import DataLoader, Dataset | |
| from transformers import AutoImageProcessor, AutoModelForImageClassification | |
| from huggingface_hub import PyTorchModelHubMixin | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| import json | |
| import torchvision.models as models | |
| ################## | |
| import torch.nn.functional as F | |
| class_frequency = torch.zeros(7) | |
| region_one_hot = F.one_hot(torch.tensor([0,1,2,3,4,5,6]), num_classes=7) | |
| # for _, _, region in train_dataset: | |
| # class_frequency += region_one_hot[region] | |
| # print(class_frequency) | |
| # class_weights = torch.full((7,), len(train_dataset)) / class_frequency | |
| # class_weights = class_weights / torch.max(class_weights) | |
| # print(class_weights) | |
| class_weights = [0.2839, 0.4268, 0.5583, 0.3873, 1.0000, 0.6036, 0.6009] | |
| ##################### | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # print(f'Using device: {device}') | |
| per_model_weights = [] | |
| with open(path_map['region_ensemble_weights.json'], 'r') as file: | |
| per_model_weights = json.load(file) | |
| search_stats = [] | |
| with open(path_map['best region models/model_histories.json'], 'r') as file: | |
| search_stats = json.load(file) | |
| my_models = [] | |
| for i, (path, _, _, _, _, _, _, _, _) in enumerate(search_stats): | |
| path = path_map[path] | |
| state_dict = torch.load(path) | |
| region_model = models.resnet18(pretrained=False) | |
| num_features = region_model.fc.in_features | |
| region_model.fc = nn.Sequential(nn.Dropout(0.5), | |
| nn.Linear(num_features, 7)) | |
| region_model.load_state_dict(state_dict) | |
| region_model.cpu() | |
| my_models.append(region_model) | |
| per_model_weights = torch.tensor(per_model_weights).to(device) | |
| ######### | |
| torch.cuda.empty_cache() | |
| ########## | |
| predicted_object_lst = [] | |
| num_regions = 7 | |
| for images in train_dataloader: | |
| images = images.to(device) | |
| # gps_coords_original = gps_coords_original.to(device) | |
| outputs = torch.zeros((images.shape[0], 7)).to(device) | |
| for i, model in enumerate(my_models): | |
| model.eval() | |
| model.to(device) | |
| model_outputs = model(images) | |
| outputs += per_model_weights[i] * model_outputs | |
| model.cpu() | |
| # print(i, len(predicted_object_lst)) | |
| outputs /= len(my_models) | |
| _, predicted_regions = torch.max(outputs, 1) | |
| predicted_regions = predicted_regions.cpu().numpy() | |
| images = images.cpu().numpy() | |
| for i in range(len(predicted_regions)): | |
| predicted_object = PredictedObject(images[i], -1, -1, predicted_regions[i]) | |
| predicted_object.predicted_region = predicted_regions[i] | |
| predicted_object_lst.append(predicted_object) | |
| torch.cuda.empty_cache() | |
| ################# | |
| predicted_object_lst = sorted(predicted_object_lst) | |
| po_predicted_region_lst = [[] for _ in range(7)] | |
| for po in predicted_object_lst: | |
| po.lat = po.original_lat | |
| po.lon = po.original_lon | |
| po_predicted_region_lst[po.predicted_region].append(po) | |
| po_datasets = [PredictionObjectDataset(x, give_originals=True) for x in po_predicted_region_lst] | |
| # print([len(ds) for ds in po_datasets]) | |
| po_loaders = [DataLoader(x, batch_size=32, shuffle=False) for x in po_datasets] | |
| # lat_mean_lst = [x.latitude_mean for x in po_datasets] | |
| # lat_std_lst = [x.latitude_std for x in po_datasets] | |
| # lon_mean_lst = [x.longitude_mean for x in po_datasets] | |
| # lon_std_lst = [x.longitude_std for x in po_datasets] | |
| ############ | |
| from sklearn.metrics import mean_absolute_error, mean_squared_error | |
| import torch.nn.functional as F | |
| # all_preds = [] | |
| # all_actuals = [] | |
| all_preds_norm = [] | |
| # all_actuals_norm = [] | |
| # all_regions = [] | |
| for i in range(num_regions): | |
| # print(f'region {i}') | |
| # model_all_preds = [] | |
| # model_all_actuals = [] | |
| model_all_preds_norm = [] | |
| # model_all_actuals_norm = [] | |
| # model_all_regions = [] | |
| val_dataloader = po_loaders[i] | |
| if (len(val_dataloader) == 0): | |
| continue | |
| state_dict = torch.load(path_map[f'models/location_model_{i}.pth']) | |
| model_loction = models.resnet18(pretrained=False) | |
| num_features = model_loction.fc.in_features | |
| model_loction.fc = nn.Linear(num_features, 2) | |
| model_loction.load_state_dict(state_dict) | |
| model_loction.to(device) | |
| model_loction.eval() | |
| with torch.no_grad(): | |
| for images, _, _ in val_dataloader: | |
| images = images.to(device) | |
| outputs = model_loction(images) | |
| # Denormalize predictions and actual values | |
| preds_norm = outputs.cpu() | |
| # actuals_norm = gps_coords.cpu() | |
| # preds = outputs.cpu() * torch.tensor([lat_std_lst[i], lon_std_lst[i]]) + torch.tensor([lat_mean_lst[i], lon_mean_lst[i]]) | |
| # actuals = gps_coords.cpu() * torch.tensor([lat_std_lst[i], lon_std_lst[i]]) + torch.tensor([lat_mean_lst[i], lon_mean_lst[i]])#gps_coords_original.cpu() | |
| # model_all_preds.append(preds) | |
| # model_all_actuals.append(actuals) | |
| model_all_preds_norm.append(preds_norm) | |
| # model_all_actuals_norm.append(actuals_norm) | |
| # model_all_regions.extend([i for _ in range(len(images))]) | |
| # Concatenate all batches | |
| # model_all_preds = torch.cat(model_all_preds) | |
| # model_all_actuals = torch.cat(model_all_actuals) | |
| model_all_preds_norm = torch.cat(model_all_preds_norm) | |
| # model_all_actuals_norm = torch.cat(model_all_actuals_norm) | |
| # Compute error metrics | |
| # rmse = F.mse_loss(model_all_actuals_norm, model_all_preds_norm) | |
| # model_all_preds = model_all_preds.numpy() | |
| # model_all_actuals = model_all_actuals.numpy() | |
| model_all_preds_norm = model_all_preds_norm.numpy() | |
| # model_all_actuals_norm = model_all_actuals_norm.numpy() | |
| # print(model_all_preds[0]) | |
| # print(model_all_actuals[0]) | |
| # print(model_all_preds_norm[0]) | |
| # print(model_all_actuals_norm[0]) | |
| # print(f'Mean Squared Error: {rmse}') | |
| # all_preds.append([model_all_preds]) | |
| # all_actuals.append([model_all_actuals]) | |
| all_preds_norm.append([model_all_preds_norm]) | |
| # print("images predicted: ", len(all_preds_norm)) | |
| # all_actuals_norm.append([model_all_actuals_norm]) | |
| # all_regions.append(model_all_regions) | |
| del model_loction | |
| torch.cuda.empty_cache() | |
| ############ | |
| # all_preds_denorm = all_preds | |
| # all_actuals_denorm = all_actuals | |
| all_preds = all_preds_norm | |
| # all_actuals = all_actuals_norm | |
| # all_regions = all_regions | |
| def flatten(lst): | |
| newlst = [] | |
| for sublst in lst: | |
| for item in sublst: | |
| newlst.append(item) | |
| return newlst | |
| all_preds = flatten(all_preds) | |
| # all_actuals = flatten(all_actuals) | |
| # all_preds_denorm = flatten(all_preds_denorm) | |
| # all_actuals_denorm = flatten(all_actuals_denorm) | |
| # all_regions = list(flatten(all_regions)) | |
| ############# | |
| # actual_denorm_y = [] | |
| # actual_denorm_x = [] | |
| # for x in all_actuals_denorm: | |
| # for e in x: | |
| # actual_denorm_x.append(e[0]) | |
| # actual_denorm_y.append(e[1]) | |
| # # actual_denorm_x.append(x[0]) | |
| # # actual_denorm_y.append(x[1]) | |
| # pred_denorm_y = [] | |
| # pred_denorm_x = [] | |
| # for x in all_preds_denorm: | |
| # for e in x: | |
| # pred_denorm_x.append(e[0]) | |
| # pred_denorm_y.append(e[1]) | |
| # # pred_denorm_x.append(x[0]) | |
| # # pred_denorm_y.append(x[1]) | |
| # actual_y = [] | |
| # actual_x = [] | |
| # for x in all_actuals: | |
| # for e in x: | |
| # actual_x.append(e[0]) | |
| # actual_y.append(e[1]) | |
| # # actual_x.append(x[0]) | |
| # # actual_y.append(x[1]) | |
| pred_y = [] | |
| pred_x = [] | |
| for x in all_preds: | |
| for e in x: | |
| pred_x.append(e[0]) | |
| pred_y.append(e[1]) | |
| ############ | |
| t = torch.zeros((len(pred_x), 2)) | |
| t[:, 0] = torch.tensor(pred_x) | |
| t[:, 1] = torch.tensor(pred_y) | |
| return t | |
| # import matplotlib.pyplot as plt | |
| # from geopy.distance import geodesic | |
| # import seaborn as sns | |
| # print(pred_x) | |
| # print(pred_y) | |
| # print(actual_x) | |
| # print(actual_y) | |
| # print(pred_denorm_x) | |
| # print(pred_denorm_y) | |
| # print(actual_denorm_x) | |
| # print(actual_denorm_y) | |
| # plt.scatter(actual_denorm_y, actual_denorm_x, label='Actual', color='black', alpha=0.5) | |
| # # plt.scatter(all_preds_denorm[:, 1], all_preds_denorm[:, 0], label='Predicted', color='blue', alpha=0.5) | |
| # over100 = 0 | |
| # under100 = 0 | |
| # under50 = 0 | |
| # under25 = 0 | |
| # all_over100 = [] | |
| # all_under100 = [] | |
| # all_under50 = [] | |
| # all_under25 = [] | |
| # average_dist = 0.0 | |
| # dists = [] | |
| # for i in range(len(actual_denorm_x)): | |
| # pred_denorm_loc = (pred_denorm_x[i], pred_denorm_y[i]) | |
| # actual_denorm_loc = (actual_denorm_x[i], actual_denorm_y[i]) | |
| # dist = geodesic(actual_denorm_loc, pred_denorm_loc).meters | |
| # dists.append(dist) | |
| # if dist > 50: | |
| # over100 += 1 | |
| # all_over100.append(pred_denorm_loc) | |
| # elif dist > 25: | |
| # under100 += 1 | |
| # all_under100.append(pred_denorm_loc) | |
| # elif dist > 10: | |
| # under50 += 1 | |
| # all_under50.append(pred_denorm_loc) | |
| # else: | |
| # under25 += 1 | |
| # all_under25.append(pred_denorm_loc) | |
| # plt.plot( | |
| # [actual_denorm_y[i], pred_denorm_y[i]], | |
| # [actual_denorm_x[i], pred_denorm_x[i]], | |
| # color='grey', | |
| # alpha=0.5, | |
| # linewidth=0.5 | |
| # ) | |
| # dists = np.array(dists) | |
| # plt.scatter([y for x,y in all_over100], [x for x,y in all_over100], label=f'over 50m: {over100}', color='red', alpha=0.5) | |
| # plt.scatter([y for x,y in all_under100], [x for x,y in all_under100], label=f'under 50m: {under100}', color='orange', alpha=0.5) | |
| # plt.scatter([y for x,y in all_under50], [x for x,y in all_under50], label=f'under 25m: {under50}', color='green', alpha=0.5) | |
| # plt.scatter([y for x,y in all_under25], [x for x,y in all_under25], label=f'under 10m: {under25}', color='blue', alpha=0.5) | |
| # plt.legend() | |
| # plt.xlabel('Longitude') | |
| # plt.ylabel('Latitude') | |
| # plt.title('Actual vs. Predicted GPS Coordinates with Error Lines') | |
| # plt.show() | |
| # regions_enum = {0 : "fisher bennett", | |
| # 1 : "outer quad", | |
| # 2 : "outside football", | |
| # 3 : "chem building", | |
| # 4 : "top of walk", | |
| # 5 : "bottom of walk", | |
| # 6 : "chem courtyard", | |
| # 7 : "no assigned region"} | |
| # colors = {0:'red', | |
| # 1:'orange', | |
| # 2:'yellow', | |
| # 3:'green', | |
| # 4:'blue', | |
| # 5:'purple', | |
| # 6:'pink', | |
| # 7:'black'} | |
| # for i in range(len(actual_denorm_x)): | |
| # plt.plot( | |
| # [actual_denorm_y[i], pred_denorm_y[i]], | |
| # [actual_denorm_x[i], pred_denorm_x[i]], | |
| # color='grey', | |
| # alpha=0.25, | |
| # linewidth=0.5 | |
| # ) | |
| # # plt.scatter([p[0] for p in pts], [p[1] for p in pts], s=15, c=[colors[i] for i in all_regions], edgecolors='black') | |
| # colors_lst = [colors[i] for i in all_regions] | |
| # plt.scatter(actual_denorm_y, actual_denorm_x, label='Actual', color=colors_lst, alpha=0.5) | |
| # plt.scatter(pred_denorm_y, pred_denorm_x, label='Predicted', color=colors_lst, alpha=0.5) | |
| # # plt.gca().invert_xaxis() | |
| # plt.show() | |
| # # Plot the distribution | |
| # plt.figure(figsize=(10, 6)) | |
| # sns.histplot(dists, bins=30, kde=True, color='blue', alpha=0.7) | |
| # # Add labels and title | |
| # plt.title("Distribution of Geodesic Distances (Accuracy of Guesses)") | |
| # plt.xlabel("Geodesic Distance (meters)") | |
| # plt.ylabel("Frequency") | |
| # # Add mean and median lines for context | |
| # mean_distance = dists.mean() | |
| # median_distance = np.median(dists) | |
| # plt.axvline(mean_distance, color='red', linestyle='--', label=f'Mean: {mean_distance:.2f} meters') | |
| # plt.axvline(median_distance, color='green', linestyle='--', label=f'Median: {median_distance:.2f} meters') | |
| # plt.legend() | |
| # plt.grid(True) | |
| # plt.show() | |
| ``` |