Upload 21 files
Browse files- SightLinks-Dev-main/.DS_Store +0 -0
- SightLinks-Dev-main/DevelopmentTools/.DS_Store +0 -0
- SightLinks-Dev-main/DevelopmentTools/ClassUtils.py +64 -0
- SightLinks-Dev-main/DevelopmentTools/Classify.py +168 -0
- SightLinks-Dev-main/DevelopmentTools/DataUtils.py +39 -0
- SightLinks-Dev-main/DevelopmentTools/EvaluatePerformance.py +126 -0
- SightLinks-Dev-main/DevelopmentTools/LoadUtils.py +158 -0
- SightLinks-Dev-main/DevelopmentTools/MobileNetV3.py +117 -0
- SightLinks-Dev-main/DevelopmentTools/VGG_Custom.py +45 -0
- SightLinks-Dev-main/DevelopmentTools/VGG_TL.py +83 -0
- SightLinks-Dev-main/DevelopmentTools/automate_annotation.py +81 -0
- SightLinks-Dev-main/DevelopmentTools/experimental_resources/FeatureExtractionMethods.py +338 -0
- SightLinks-Dev-main/DevelopmentTools/experimental_resources/SelfSupervisedFeatureExtraction.py +137 -0
- SightLinks-Dev-main/DevelopmentTools/experimental_resources/quantisedMobileNet.py +352 -0
- SightLinks-Dev-main/LegacyVersion/ClassifierModel.py +54 -0
- SightLinks-Dev-main/LegacyVersion/ClassifierTraining.py +73 -0
- SightLinks-Dev-main/LegacyVersion/CrosswalkDataset.py +66 -0
- SightLinks-Dev-main/LegacyVersion/EvaluateModel.py +38 -0
- SightLinks-Dev-main/LegacyVersion/Utilities.py +34 -0
- SightLinks-Dev-main/README.md +63 -0
- SightLinks-Dev-main/requirements.txt +14 -0
SightLinks-Dev-main/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
SightLinks-Dev-main/DevelopmentTools/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
SightLinks-Dev-main/DevelopmentTools/ClassUtils.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# All the utilities required for the inference and further tuning of the classification models
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torchvision import transforms
|
| 5 |
+
from torch.utils.data import Dataset
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import numpy as np
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# The custom dataset object I used to load the segmented image dataset - will only fit our format of data!!!
|
| 13 |
+
class CrosswalkDataset(Dataset):
|
| 14 |
+
def __init__(self, src_dir, transform=None):
|
| 15 |
+
self.src_dir = src_dir
|
| 16 |
+
self.transform = transform
|
| 17 |
+
|
| 18 |
+
dir_files = sorted(os.listdir(src_dir))
|
| 19 |
+
self.image_paths = [file_path for file_path in dir_files if file_path.endswith((".png", ".jpg", ".jpeg"))]
|
| 20 |
+
self.label_paths = [file_path for file_path in dir_files if file_path.endswith(".txt")]
|
| 21 |
+
|
| 22 |
+
def __len__(self):
|
| 23 |
+
return len(self.image_paths)
|
| 24 |
+
|
| 25 |
+
def __getitem__(self, index):
|
| 26 |
+
image_path = os.path.join(self.src_dir, self.image_paths[index])
|
| 27 |
+
label_path = os.path.join(self.src_dir, self.label_paths[index])
|
| 28 |
+
|
| 29 |
+
label = [0, 0]
|
| 30 |
+
try:
|
| 31 |
+
if np.array([int(open(label_path).read().strip())]) == 1:
|
| 32 |
+
label = [1, 0]
|
| 33 |
+
else:
|
| 34 |
+
label = [0, 1]
|
| 35 |
+
except:
|
| 36 |
+
pass
|
| 37 |
+
image = Image.open(image_path)
|
| 38 |
+
|
| 39 |
+
if self.transform is None:
|
| 40 |
+
self.transform = transforms.ToTensor()
|
| 41 |
+
|
| 42 |
+
return (self.transform(image), torch.FloatTensor(label))
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# Mean and Std. are chosen arbitrarily - need to be tuned
|
| 46 |
+
# The image do not have to be resized - the global pooling layer should technically deal with this, but I haven't tested this,
|
| 47 |
+
# so resizing prevents potential inaccuracies from occuring,
|
| 48 |
+
vgg_transform = transforms.Compose([
|
| 49 |
+
transforms.Resize((224, 224)),
|
| 50 |
+
transforms.ToTensor(),
|
| 51 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.3, 0.3, 0.3])
|
| 52 |
+
])
|
| 53 |
+
|
| 54 |
+
res_transform = transforms.Compose([
|
| 55 |
+
transforms.Resize((256, 256)),
|
| 56 |
+
transforms.ToTensor(),
|
| 57 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.3, 0.3, 0.3])
|
| 58 |
+
])
|
| 59 |
+
|
| 60 |
+
mob3_transform = transforms.Compose([
|
| 61 |
+
transforms.Resize((224, 224)),
|
| 62 |
+
transforms.ToTensor(),
|
| 63 |
+
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
| 64 |
+
])
|
SightLinks-Dev-main/DevelopmentTools/Classify.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ClassUtils
|
| 2 |
+
import LoadUtils
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torchvision
|
| 6 |
+
import torchvision.models as models
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import numpy as np
|
| 9 |
+
import random
|
| 10 |
+
|
| 11 |
+
import warnings
|
| 12 |
+
|
| 13 |
+
# Torchvision's models utils has a depreciation warning for the pretrained parameter in its instantiation but we don't use that
|
| 14 |
+
warnings.filterwarnings(
|
| 15 |
+
action='ignore',
|
| 16 |
+
category=DeprecationWarning,
|
| 17 |
+
module=r'.*'
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
vgg16_state_path = "VGG16_Full_State_Dict.pth"
|
| 21 |
+
# A prototype v2 version that's only been trained on 2000 images by transfer learning
|
| 22 |
+
mobileNet_path = "MobileNetV3_state_dict_big_train.pth"
|
| 23 |
+
data_path = "zebra_annotations/classification_data"
|
| 24 |
+
|
| 25 |
+
classify = None
|
| 26 |
+
transform = None
|
| 27 |
+
|
| 28 |
+
# Loads a given VGG binary classifier state dictionairy into a model, for transfer learning or immediate use
|
| 29 |
+
def load_vgg_classifier(state_dict_path):
|
| 30 |
+
# Ignore depreciation warnings --> It works fine for our needs
|
| 31 |
+
model = models.vgg16()
|
| 32 |
+
|
| 33 |
+
# Modifies fully connected layer to output binary class predictions
|
| 34 |
+
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 2)
|
| 35 |
+
state_dict = torch.load(state_dict_path, weights_only=True)
|
| 36 |
+
model.load_state_dict(state_dict)
|
| 37 |
+
|
| 38 |
+
model.eval()
|
| 39 |
+
|
| 40 |
+
return model
|
| 41 |
+
|
| 42 |
+
# Only loads the classifier weights, in the case where it is transfer learning on only the top
|
| 43 |
+
# or the feature extraction has been frozen during training
|
| 44 |
+
# This saves a significant amount of space
|
| 45 |
+
def partial_vgg_load(classifier_state_dict_path):
|
| 46 |
+
model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
|
| 47 |
+
|
| 48 |
+
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 2)
|
| 49 |
+
model.classifier.load_state_dict(classifier_state_dict_path)
|
| 50 |
+
|
| 51 |
+
model.eval()
|
| 52 |
+
|
| 53 |
+
return model
|
| 54 |
+
|
| 55 |
+
# Loads a given ResNet binary classifier state dictionairy into a model, for transfer learning or immediate use
|
| 56 |
+
def load_resnet_classifier(state_dict_path):
|
| 57 |
+
# Ignore depreciation warnings --> It works fine for our needs
|
| 58 |
+
resnet = models.resnet18(pretrained=True)
|
| 59 |
+
resnet.fc = torch.nn.Linear(resnet.fc.in_features, 1)
|
| 60 |
+
|
| 61 |
+
state_dict = torch.load(state_dict_path, weights_only=True)
|
| 62 |
+
resnet.load_state_dict(state_dict)
|
| 63 |
+
|
| 64 |
+
resnet.eval()
|
| 65 |
+
return resnet
|
| 66 |
+
|
| 67 |
+
# Loads a given MN3 binary classifier state dictionairy into a model, for transfer learning or immediate use
|
| 68 |
+
# We use this for our current version
|
| 69 |
+
def load_mobileNet_classifier(state_dict_path):
|
| 70 |
+
# Ignore depreciation warnings --> It works fine for our needs
|
| 71 |
+
model = models.mobilenet_v3_small()
|
| 72 |
+
model.classifier[3] = torch.nn.Linear(model.classifier[3].in_features, 2)
|
| 73 |
+
|
| 74 |
+
state_dict = torch.load(state_dict_path, weights_only=True)
|
| 75 |
+
model.load_state_dict(state_dict)
|
| 76 |
+
|
| 77 |
+
model.eval()
|
| 78 |
+
return model
|
| 79 |
+
|
| 80 |
+
# classify = load_vgg_classifier(vgg16_state_path)
|
| 81 |
+
# transform = ClassUtils.vgg_transform
|
| 82 |
+
|
| 83 |
+
classify = load_mobileNet_classifier(mobileNet_path)
|
| 84 |
+
transform = ClassUtils.mob3_transform
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# Takes a numpy array representing an image and returns a boolean classification based on the given threshold value
|
| 89 |
+
# Defaults to 0.35 as it errs on the side of letting images through.
|
| 90 |
+
def infer(image, infer_model=classify, infer_transform=transform):
|
| 91 |
+
|
| 92 |
+
# If infer model and transform have not been initialised, the function cannot run so the program will throw an error
|
| 93 |
+
#
|
| 94 |
+
# This was chosen over intiialising default values since this way it is clear an error is occuring rather than be hidden
|
| 95 |
+
# in a bunch of system logs and potenitally continually causing errors
|
| 96 |
+
if infer_model is None or infer_transform is None:
|
| 97 |
+
raise TypeError("Error: The inference classes have not been initialised properly.")
|
| 98 |
+
if not torch.is_tensor(image):
|
| 99 |
+
image = infer_transform(image)
|
| 100 |
+
|
| 101 |
+
# Expects batches - this adds another dimensions to properly format the data
|
| 102 |
+
if len(image.shape) <= 3:
|
| 103 |
+
image = image.unsqueeze(0)
|
| 104 |
+
|
| 105 |
+
logit_pred = infer_model(image)
|
| 106 |
+
|
| 107 |
+
probs = 1 / (1 + np.exp(-logit_pred.detach().numpy()))
|
| 108 |
+
# prob = max(0, min(np.exp(logit_pred.detach().numpy())[0], 1))
|
| 109 |
+
return probs
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# Takes a PIL image and returns a boolean classification based on the given threshold value
|
| 113 |
+
# Defaults to 0.35 as it errs on the side of letting images through.
|
| 114 |
+
def PIL_infer(image, threshold=0.35):
|
| 115 |
+
tensor_im = torchvision.transforms.functional.pil_to_tensor(image).float()/ 255
|
| 116 |
+
prediction = infer(tensor_im)
|
| 117 |
+
classification = prediction[0][0] > threshold
|
| 118 |
+
return classification
|
| 119 |
+
|
| 120 |
+
# For testing and demo purposes
|
| 121 |
+
def infer_and_display(image, threshold, actual_label, onlyWrong=False):
|
| 122 |
+
probability = infer(image)
|
| 123 |
+
prediction = probability > threshold
|
| 124 |
+
is_correct = (actual_label[0] == 1) == prediction
|
| 125 |
+
|
| 126 |
+
if onlyWrong and is_correct:
|
| 127 |
+
return prediction
|
| 128 |
+
|
| 129 |
+
plt.imshow(torch.permute(image, (1, 2, 0)).detach().numpy())
|
| 130 |
+
plt.title(f"Prediction: {prediction[0][0]} with confidence {probability[0][0]}%, Actual: {actual_label[0] == 1}")
|
| 131 |
+
plt.axis("off")
|
| 132 |
+
plt.show()
|
| 133 |
+
|
| 134 |
+
return probability
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# Template code for how to create an inference code
|
| 138 |
+
def example_init(examples=20, display=True):
|
| 139 |
+
dataset = ClassUtils.CrosswalkDataset(data_path)
|
| 140 |
+
|
| 141 |
+
random_points = [random.randint(0, len(dataset)-1) for i in range(examples)]
|
| 142 |
+
correct, incorrect, falsepos, falseneg = 0, 0, 0, 0
|
| 143 |
+
for point in random_points:
|
| 144 |
+
image, label = dataset[point]
|
| 145 |
+
|
| 146 |
+
class_guess = [0, 1]
|
| 147 |
+
if infer(image)[0][0] > 0.5:
|
| 148 |
+
class_guess = [1, 0]
|
| 149 |
+
if class_guess == label.tolist():
|
| 150 |
+
correct += 1
|
| 151 |
+
else:
|
| 152 |
+
if class_guess[0]:
|
| 153 |
+
falsepos += 1
|
| 154 |
+
else:
|
| 155 |
+
falseneg += 1
|
| 156 |
+
incorrect += 1
|
| 157 |
+
|
| 158 |
+
if display:
|
| 159 |
+
print(f"Prediction of {infer_and_display(image, 0.4, label)}% of a crosswalk (Crosswalk: {label[0]==1})")
|
| 160 |
+
print(f"correct: {correct}, incorrect: {incorrect}, of which false positives were {falsepos} and false negatives were {falseneg}")
|
| 161 |
+
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
example_init(examples=200,display=False)
|
| 164 |
+
|
| 165 |
+
else:
|
| 166 |
+
print(f"Module: [{__name__}] has been loaded")
|
| 167 |
+
|
| 168 |
+
|
SightLinks-Dev-main/DevelopmentTools/DataUtils.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from shapely.geometry import Polygon
|
| 2 |
+
import yaml
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
# Each box has format (x_1, y_1, x_2, y_2) - this does mean this is an estimate.
|
| 6 |
+
# box_1 is the bounding box of the crosswalk, and box_2 is that of the tile.
|
| 7 |
+
def check_box_intersection(box_1, box_2, threshold=0.6):
|
| 8 |
+
# Threshold is the min IoU required to consider it as having a crosswalk - the minimum percent area of the crosswalk that must be in the tile
|
| 9 |
+
formatted_box_1 = [[box_1[0], box_1[1]], [box_1[2], box_1[1]], [box_1[2], box_1[3]], [box_1[0], box_1[3]]] # Formatting follows shapely clockwise system
|
| 10 |
+
|
| 11 |
+
poly_1 = Polygon(formatted_box_1)
|
| 12 |
+
poly_2 = Polygon(box_2)
|
| 13 |
+
try:
|
| 14 |
+
iou = poly_1.intersection(poly_2).area / poly_1.union(poly_2).area
|
| 15 |
+
scaled_iou = iou * ((poly_1.area) / (poly_2.area))
|
| 16 |
+
# print("--------------")
|
| 17 |
+
# print(iou, scaled_iou)
|
| 18 |
+
return (scaled_iou > threshold)
|
| 19 |
+
except ZeroDivisionError:
|
| 20 |
+
# In the case of a zero division error
|
| 21 |
+
print("ZERO DIVISION ERROR", poly_1.area, poly_2.area)
|
| 22 |
+
return False
|
| 23 |
+
|
| 24 |
+
# Loads a YAML dataset into the user's device
|
| 25 |
+
def load_yaml_database(yaml_path):
|
| 26 |
+
config_file = None
|
| 27 |
+
|
| 28 |
+
with open(yaml_path, "r") as file:
|
| 29 |
+
config_file = yaml.safe_load(file)
|
| 30 |
+
|
| 31 |
+
root = Path(config_file['path'])
|
| 32 |
+
train_dir = root / config_file['train'] # Training data
|
| 33 |
+
valid_dir = root / config_file['val'] # Validation data
|
| 34 |
+
test_dir = root / config_file['test'] # Testing data
|
| 35 |
+
image_size = config_file['img_size']
|
| 36 |
+
classes = config_file['names']
|
| 37 |
+
|
| 38 |
+
# Returns in format (directories, label_description)
|
| 39 |
+
return (train_dir, valid_dir, test_dir), (image_size, classes)
|
SightLinks-Dev-main/DevelopmentTools/EvaluatePerformance.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
import Classify
|
| 5 |
+
# We import each model class inside of their respective test method to prevent long load times of initialising all of them
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# These are the default functions used for evaluation - overwrite or add more parameters as is required for your testing
|
| 9 |
+
# We did not use these for our testing - we calculated it manually - but this is a much easier way
|
| 10 |
+
class Results:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.accuracy = None
|
| 13 |
+
self.precision = None
|
| 14 |
+
self.recall = None
|
| 15 |
+
self.F1 = None
|
| 16 |
+
|
| 17 |
+
# Proportion of all predictions that were right - basically what did it get right
|
| 18 |
+
def calculate_accuracy(self, correct_pos, correct_neg, total):
|
| 19 |
+
return (correct_pos + correct_neg) / total if total > 0 else 0
|
| 20 |
+
|
| 21 |
+
# Proportion of all positive predictions that were actually positive - aka if it predicted positive, how often was it
|
| 22 |
+
# actually right
|
| 23 |
+
def calculate_precision(self, correct_pos, false_pos):
|
| 24 |
+
return correct_pos / (correct_pos + false_pos) if (correct_pos + false_pos) > 0 else 0
|
| 25 |
+
|
| 26 |
+
# Proportion of all positive cases that were predicted positive - aka how many positive images did it correctly predict
|
| 27 |
+
def calculate_recall(self, correct_pos, false_neg):
|
| 28 |
+
return correct_pos / (correct_pos + false_neg) if (correct_pos + false_neg) > 0 else 0
|
| 29 |
+
|
| 30 |
+
# A combination of precision and recall that takes both of them into consideration - a decent 'summary' accuracy metric
|
| 31 |
+
def calculateF1(self, precision, recall):
|
| 32 |
+
return 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Purely for comparing the performance of Binary Classification models
|
| 36 |
+
class Evaluate:
|
| 37 |
+
def __init__(self):
|
| 38 |
+
self.model = None
|
| 39 |
+
self.dataLoader=None
|
| 40 |
+
self.threshold=0.5
|
| 41 |
+
pass
|
| 42 |
+
|
| 43 |
+
# GENERALISED EVALUATION FUNCTION FOR COMPARISION BETWEEN MODEL ARCHITECTURES - USE SPECIFIC EVAL FUNC FOR TESTING TRAINING SETUPS
|
| 44 |
+
# Tests performance of a model on unseen data - this is the function we used to evaluate our classification models during training
|
| 45 |
+
# to determine the best model architecture to use.
|
| 46 |
+
def run_test(self, verbose=True, visual=False):
|
| 47 |
+
if self.model is None or self.dataLoader is None:
|
| 48 |
+
raise AttributeError("Please choose a model to test before running the test")
|
| 49 |
+
|
| 50 |
+
self.model.eval()
|
| 51 |
+
|
| 52 |
+
total, correct_pos, correct_neg, false_pos, false_neg = 0, 0, 0, 0, 0
|
| 53 |
+
running_average_time = 0.0
|
| 54 |
+
collated_results = Results()
|
| 55 |
+
incorrect = []
|
| 56 |
+
|
| 57 |
+
for image, gt in self.dataLoader:
|
| 58 |
+
current_start_time = time.time()
|
| 59 |
+
prediction = Classify.infer(image, self.model)
|
| 60 |
+
running_average_time += time.time() - current_start_time
|
| 61 |
+
positive, negative = prediction[prediction[:, 0] > self.threshold], prediction[prediction[:, 0] <= self.threshold]
|
| 62 |
+
positive_gt, negative_gt = gt[prediction[:, 0] > self.threshold], gt[prediction[:, 0] <= self.threshold]
|
| 63 |
+
|
| 64 |
+
correct_pos += len(positive[positive_gt[:, 0]==1])
|
| 65 |
+
correct_neg += len(negative[negative_gt[:, 0]==0])
|
| 66 |
+
false_pos += len(positive[positive_gt[:, 0]==0])
|
| 67 |
+
false_neg += len(negative[negative_gt[:, 0]==1])
|
| 68 |
+
total += min(self.dataLoader.batch_size, len(image))
|
| 69 |
+
|
| 70 |
+
false_pos_mask = (prediction[:, 0] > self.threshold) & (gt[:, 0].detach().numpy() == 0)
|
| 71 |
+
false_neg_mask = (prediction[:, 0] < self.threshold) & (gt[:, 0].detach().numpy() == 1)
|
| 72 |
+
|
| 73 |
+
if len(false_pos_mask) > 0:
|
| 74 |
+
incorrect.append((image[false_pos_mask], gt[false_pos_mask]))
|
| 75 |
+
if len(false_neg_mask) > 0:
|
| 76 |
+
incorrect.append((image[false_neg_mask], gt[false_neg_mask]))
|
| 77 |
+
|
| 78 |
+
if verbose:
|
| 79 |
+
print(f"Total Images Processed: [{total}],"
|
| 80 |
+
f" \nAccuracy: [{((correct_pos+correct_neg)/total)*100:.2f}%],"
|
| 81 |
+
f" \nCorrect Positives: [{correct_pos}], Correct Negatives: [{correct_neg}],"
|
| 82 |
+
f" \nFalse Positives: [{false_pos}], False Negatives [{false_neg}],"
|
| 83 |
+
f" \nAverage Running Time (s) per image: [{running_average_time / total}]")
|
| 84 |
+
|
| 85 |
+
if visual and incorrect:
|
| 86 |
+
for (img_set, lab_set) in incorrect:
|
| 87 |
+
for (img, lab) in zip(img_set, lab_set):
|
| 88 |
+
if len(img) > 0:
|
| 89 |
+
Classify.infer_and_display(img, 0.5, lab)
|
| 90 |
+
|
| 91 |
+
return (correct_pos, correct_neg, false_pos, false_neg, total)
|
| 92 |
+
|
| 93 |
+
# What we used to test how changes in our training paramters and input data affected performance
|
| 94 |
+
def test_MobileNet3_default(self, model_state_dict, test_num=1, verbose=True, visual=False) -> Results:
|
| 95 |
+
import MobileNetV3 as mn3
|
| 96 |
+
|
| 97 |
+
# We excluded the last 5% of data samples from training
|
| 98 |
+
if test_num > len(mn3.dataset) * 0.05:
|
| 99 |
+
test_num = int((len(mn3.dataset) - 1) * 0.05)
|
| 100 |
+
|
| 101 |
+
test_loader = mn3.DataLoader(
|
| 102 |
+
mn3.Subset(mn3.dataset, mn3.random.sample(list(range(int(len(mn3.dataset) * 0.95), len(mn3.dataset))), test_num)),
|
| 103 |
+
batch_size=mn3.batch_size, shuffle=False)
|
| 104 |
+
|
| 105 |
+
test_model = Classify.load_mobileNet_classifier(model_state_dict)
|
| 106 |
+
|
| 107 |
+
self.model = test_model
|
| 108 |
+
self.dataLoader = test_loader
|
| 109 |
+
|
| 110 |
+
correct_pos, correct_neg, false_pos, false_neg, total = self.run_test(verbose=verbose, visual=visual)
|
| 111 |
+
|
| 112 |
+
self.model=None
|
| 113 |
+
self.dataLoader=None
|
| 114 |
+
test_results = Results()
|
| 115 |
+
|
| 116 |
+
test_results.accuracy = test_results.calculate_accuracy(correct_pos, correct_neg, total)
|
| 117 |
+
test_results.precision = test_results.calculate_precision(correct_pos, false_pos)
|
| 118 |
+
test_results.recall = test_results.calculate_recall(correct_pos, false_neg)
|
| 119 |
+
test_results.F1 = test_results.calculateF1(test_results.precision, test_results.recall)
|
| 120 |
+
|
| 121 |
+
return test_results
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
eval = Evaluate()
|
| 125 |
+
if __name__ == "__main__":
|
| 126 |
+
mn3_test_results = eval.test_MobileNet3_default("MobileNetV3_state_dict_big_train.pth", test_num=10000, visual=True)
|
SightLinks-Dev-main/DevelopmentTools/LoadUtils.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import yaml
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import shutil
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import math
|
| 7 |
+
import numpy as np
|
| 8 |
+
from shapely.geometry import Polygon
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
from matplotlib.patches import Polygon as PltPolygon
|
| 11 |
+
import random
|
| 12 |
+
|
| 13 |
+
# Should eventually be moved to a settings class
|
| 14 |
+
zebra_labels = "zebra_annotations/txt_annotations"
|
| 15 |
+
zebra_images = "zebra_annotations/zebra_images"
|
| 16 |
+
save_dir = "zebra_annotations/classification_data"
|
| 17 |
+
segments = 4
|
| 18 |
+
|
| 19 |
+
# Each box has format (x_1, y_1, x_2, y_2) - this does mean this is an estimate.
|
| 20 |
+
# box_1 is the bounding box of the crosswalk, and box_2 is that of the tile.
|
| 21 |
+
def check_box_intersection(box_1, box_2, threshold=0.6):
|
| 22 |
+
# Threshold is the min IoU required to consider it as having a crosswalk - the minimum percent area of the crosswalk that must be in the tile
|
| 23 |
+
formatted_box_1 = [[box_1[0], box_1[1]], [box_1[2], box_1[1]], [box_1[2], box_1[3]], [box_1[0], box_1[3]]] # Formatting follows shapely clockwise system
|
| 24 |
+
|
| 25 |
+
poly_1 = Polygon(formatted_box_1)
|
| 26 |
+
poly_2 = Polygon(box_2)
|
| 27 |
+
try:
|
| 28 |
+
iou = poly_1.intersection(poly_2).area / poly_1.union(poly_2).area
|
| 29 |
+
scaled_iou = iou * ((poly_1.area) / (poly_2.area))
|
| 30 |
+
# print("--------------")
|
| 31 |
+
# print(iou, scaled_iou)
|
| 32 |
+
return (scaled_iou > threshold)
|
| 33 |
+
except ZeroDivisionError:
|
| 34 |
+
# In the case of a zero division error
|
| 35 |
+
print("ZERO DIVISION ERROR", poly_1.area, poly_2.area)
|
| 36 |
+
return False
|
| 37 |
+
|
| 38 |
+
def load_yaml_database(yaml_path):
|
| 39 |
+
config_file = None
|
| 40 |
+
|
| 41 |
+
with open(yaml_path, "r") as file:
|
| 42 |
+
config_file = yaml.safe_load(file)
|
| 43 |
+
|
| 44 |
+
root = Path(config_file['path'])
|
| 45 |
+
train_dir = root / config_file['train'] # Training data
|
| 46 |
+
valid_dir = root / config_file['val'] # Validation data
|
| 47 |
+
test_dir = root / config_file['test'] # Testing data
|
| 48 |
+
image_size = config_file['img_size']
|
| 49 |
+
classes = config_file['names']
|
| 50 |
+
|
| 51 |
+
# Returns in format (directories, label_description)
|
| 52 |
+
return (train_dir, valid_dir, test_dir), (image_size, classes)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# Takes in a entity database, labelled for bounding box regression, and breaks down the images into
|
| 56 |
+
# smaller images, with labels for classification training
|
| 57 |
+
def convert_database_to_segments(image_dir, label_dir, dst_dir, overwrite=False):
|
| 58 |
+
if not os.path.exists(image_dir) or not os.path.exists(label_dir):
|
| 59 |
+
print("Error: Image or label directories do not exist")
|
| 60 |
+
return
|
| 61 |
+
|
| 62 |
+
# If there isn't a directory created to save the new data to, create it
|
| 63 |
+
dst_dir = Path(dst_dir)
|
| 64 |
+
try:
|
| 65 |
+
if overwrite and dst_dir.exists() and dst_dir.is_dir():
|
| 66 |
+
shutil.rmtree(dst_dir)
|
| 67 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 68 |
+
else:
|
| 69 |
+
dst_dir.mkdir(parents=True, exist_ok=False)
|
| 70 |
+
except FileExistsError:
|
| 71 |
+
# If the conversion has already been made, don't do anything
|
| 72 |
+
return
|
| 73 |
+
|
| 74 |
+
# Some images are unlabelled and will be ignored
|
| 75 |
+
image_files = {Path(f).stem for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))}
|
| 76 |
+
|
| 77 |
+
file_base = 0
|
| 78 |
+
iterations = 0
|
| 79 |
+
|
| 80 |
+
for label in os.listdir(label_dir)[1:]:
|
| 81 |
+
if label.endswith('.txt'):
|
| 82 |
+
image_name = Path(label).stem
|
| 83 |
+
|
| 84 |
+
if image_name in image_files:
|
| 85 |
+
image_path = os.path.join(image_dir, image_name)
|
| 86 |
+
label_path = os.path.join(label_dir, label)
|
| 87 |
+
# Saves the broken down segments to the destination directory - no need to return
|
| 88 |
+
file_base = breakdown(image_path, label_path, dst_dir, file_base)
|
| 89 |
+
|
| 90 |
+
if iterations < 10:
|
| 91 |
+
print(file_base, end=" ") # To make sure that it is progressing - a progress bar of sorts
|
| 92 |
+
iterations += 1
|
| 93 |
+
else:
|
| 94 |
+
print(file_base, end= '\r',) # This doesn't work on the VS code terminal unfortunately
|
| 95 |
+
iterations = 0
|
| 96 |
+
|
| 97 |
+
def breakdown(image_path, label, dst_dir, file_base, segment=segments, targ_size = None):
|
| 98 |
+
with Image.open(image_path + ".jpg") as image:
|
| 99 |
+
img_size, take_size = image.size[0], None
|
| 100 |
+
|
| 101 |
+
# Can segment images by quantity (how many images you want) or by quantity (how large do you want the images)
|
| 102 |
+
if targ_size is None:
|
| 103 |
+
take_size = math.floor(img_size / segment)
|
| 104 |
+
|
| 105 |
+
else:
|
| 106 |
+
segment = math.floor(img_size / targ_size)
|
| 107 |
+
take_size = img_size / segment
|
| 108 |
+
|
| 109 |
+
# Particular to the Zebra label format
|
| 110 |
+
label_data = []
|
| 111 |
+
with open(label, 'r') as label_file:
|
| 112 |
+
for line in label_file.readlines():
|
| 113 |
+
if line[0] == '0':
|
| 114 |
+
parsed = list(map(float, line.split()[1:]))
|
| 115 |
+
entity_box = np.array([(parsed[i], parsed[i + 1]) for i in range(0, len(parsed), 2)])
|
| 116 |
+
label_data.append(entity_box * img_size)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
img = np.array(image)
|
| 120 |
+
for i in range(segment):
|
| 121 |
+
for j in range(segment):
|
| 122 |
+
box_coordinates = [i * take_size, j * take_size, min((i + 1) * take_size, len(img[0])), min((j + 1) * take_size, len(img))]
|
| 123 |
+
new_img = img[box_coordinates[1]: box_coordinates[3], box_coordinates[0]: box_coordinates[2]]
|
| 124 |
+
|
| 125 |
+
# Some images do not load so have size zero, which causes crashes when loading them from the database during training
|
| 126 |
+
# because tensor conversions cannot take empty lists, For that reason we threshold at an arbitrarily low number here.
|
| 127 |
+
if new_img.size <= 32:
|
| 128 |
+
continue
|
| 129 |
+
|
| 130 |
+
new_image = Image.fromarray(new_img)
|
| 131 |
+
|
| 132 |
+
crosswalk_intersection = False
|
| 133 |
+
for crosswalk_box in label_data:
|
| 134 |
+
if check_box_intersection(box_coordinates, crosswalk_box):
|
| 135 |
+
# 1 means an image that contains a crosswalk (a significant portion of it)
|
| 136 |
+
crosswalk_intersection = True
|
| 137 |
+
else:
|
| 138 |
+
# 0 means background image (does not contain any significant portion of a crosswalk)
|
| 139 |
+
pass
|
| 140 |
+
|
| 141 |
+
if crosswalk_intersection:
|
| 142 |
+
with open(os.path.join(dst_dir, str(file_base)) + ".txt", 'w') as new_label_file:
|
| 143 |
+
new_label_file.write("1")
|
| 144 |
+
new_image.save(str(os.path.join(dst_dir, str(file_base))) + ".png")
|
| 145 |
+
|
| 146 |
+
# As the crosswalks are sparse - this improves the balance of positive to negative cases for training
|
| 147 |
+
else:
|
| 148 |
+
if random.randint(0, 4) >= 1:
|
| 149 |
+
with open(os.path.join(dst_dir, str(file_base)) + ".txt", 'w') as new_label_file:
|
| 150 |
+
new_label_file.write("0")
|
| 151 |
+
new_image.save(str(os.path.join(dst_dir, str(file_base))) + ".png")
|
| 152 |
+
|
| 153 |
+
# This ensures we don't overwrite previous segment files
|
| 154 |
+
file_base += 1
|
| 155 |
+
|
| 156 |
+
return file_base
|
| 157 |
+
|
| 158 |
+
convert_database_to_segments("zebra_annotations/zebra_images", "zebra_annotations/txt_annotations", "zebra_annotations/classification_data")
|
SightLinks-Dev-main/DevelopmentTools/MobileNetV3.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This is unquantised - for comparision
|
| 2 |
+
|
| 3 |
+
import ClassUtils
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
import torch.optim as optim
|
| 7 |
+
from torch.utils.data import Subset
|
| 8 |
+
import random
|
| 9 |
+
from torchvision import models, transforms
|
| 10 |
+
from torch.utils.data import DataLoader
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
import matplotlib.pyplot as plt
|
| 14 |
+
import numpy as np
|
| 15 |
+
|
| 16 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 17 |
+
print(device)
|
| 18 |
+
|
| 19 |
+
# These define the model that will be trained
|
| 20 |
+
num_classes = 2
|
| 21 |
+
batch_size = 256
|
| 22 |
+
epochs = 25
|
| 23 |
+
learning_rate = 5e-4
|
| 24 |
+
train_data_size = 25000
|
| 25 |
+
saved_state_dict_path = "MobileNetV3_test.pth"
|
| 26 |
+
|
| 27 |
+
model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.DEFAULT)
|
| 28 |
+
|
| 29 |
+
model.classifier[3] = nn.Linear(model.classifier[3].in_features, num_classes)
|
| 30 |
+
model = model.to(device)
|
| 31 |
+
|
| 32 |
+
dataset = ClassUtils.CrosswalkDataset("zebra_annotations/classification_data")
|
| 33 |
+
|
| 34 |
+
train_loader = DataLoader(
|
| 35 |
+
Subset(dataset, random.sample(list(range(0, int(len(dataset) * 0.95))), train_data_size)),
|
| 36 |
+
batch_size=batch_size, shuffle=True)
|
| 37 |
+
test_loader = DataLoader(
|
| 38 |
+
Subset(dataset, random.sample(list(range(int(len(dataset) * 0.95), len(dataset))), 12)),
|
| 39 |
+
batch_size=batch_size, shuffle=False)
|
| 40 |
+
|
| 41 |
+
criterion = nn.BCELoss()
|
| 42 |
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# Generalised training function that uses a training-testing split defined in the training variables.
|
| 46 |
+
# Works best with transfer learning as is shown in the 'MobileNetV3.py' function where it is defined - check it out.
|
| 47 |
+
def train_model():
|
| 48 |
+
model.train()
|
| 49 |
+
start_time = time.time()
|
| 50 |
+
for epoch in range(epochs):
|
| 51 |
+
to_do = train_data_size
|
| 52 |
+
running_loss = 0.0
|
| 53 |
+
for inputs, labels in train_loader:
|
| 54 |
+
try:
|
| 55 |
+
inputs, labels = inputs.to(device), labels.to(device)
|
| 56 |
+
except:
|
| 57 |
+
continue
|
| 58 |
+
|
| 59 |
+
optimizer.zero_grad()
|
| 60 |
+
outputs = torch.sigmoid(model(inputs))
|
| 61 |
+
loss = criterion(outputs, labels)
|
| 62 |
+
loss.backward()
|
| 63 |
+
optimizer.step()
|
| 64 |
+
|
| 65 |
+
running_loss += loss.item()
|
| 66 |
+
to_do -= batch_size
|
| 67 |
+
|
| 68 |
+
print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}, time {time.time()- start_time}")
|
| 69 |
+
start_time = time.time()
|
| 70 |
+
|
| 71 |
+
# Do not use to actually evaluate performance, this is for quick checks - 'EvaluatePerformance.py' has actual quantified evaluation tools.
|
| 72 |
+
def test_model():
|
| 73 |
+
model.eval()
|
| 74 |
+
correct = 0
|
| 75 |
+
total = 0
|
| 76 |
+
with torch.no_grad():
|
| 77 |
+
for inputs, labels in test_loader:
|
| 78 |
+
try:
|
| 79 |
+
inputs, labels = inputs.to(device), labels.to(device)
|
| 80 |
+
except:
|
| 81 |
+
continue
|
| 82 |
+
outputs = torch.sigmoid(model(inputs))
|
| 83 |
+
|
| 84 |
+
predicted = (outputs/100) > 0.5
|
| 85 |
+
for i in range(len(inputs)):
|
| 86 |
+
plt.close()
|
| 87 |
+
plt.imshow(torch.permute(inputs[i], (1, 2, 0)).cpu().detach().numpy())
|
| 88 |
+
plt.title(f"prediction of {outputs[i].tolist()[0]:.3f}%, {100 * predicted[i].tolist()[0]:.3f}%,\nactual: {labels[i].tolist()}")
|
| 89 |
+
plt.axis("off")
|
| 90 |
+
plt.show()
|
| 91 |
+
|
| 92 |
+
total += labels.size(0)
|
| 93 |
+
# print(predicted, labels)
|
| 94 |
+
|
| 95 |
+
for prediction, label in zip(predicted, labels):
|
| 96 |
+
correct += ((prediction[0]>50) == label[0])
|
| 97 |
+
|
| 98 |
+
print(f"Accuracy: {100 * correct / total}%")
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
train = True
|
| 103 |
+
if __name__ == "__main__":
|
| 104 |
+
if train:
|
| 105 |
+
train_model()
|
| 106 |
+
torch.save(model.state_dict(), "mn3_vs55.pth")
|
| 107 |
+
else:
|
| 108 |
+
state_dictionairy = torch.load(saved_state_dict_path, weights_only=True)
|
| 109 |
+
print(type(state_dictionairy))
|
| 110 |
+
model.load_state_dict(state_dictionairy)
|
| 111 |
+
|
| 112 |
+
test_model()
|
| 113 |
+
|
| 114 |
+
else:
|
| 115 |
+
state_dictionairy = torch.load(saved_state_dict_path, weights_only=True)
|
| 116 |
+
model.load_state_dict(state_dictionairy)
|
| 117 |
+
print(f"Module: [{__name__}] has been loaded")
|
SightLinks-Dev-main/DevelopmentTools/VGG_Custom.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import numpy as np
|
| 4 |
+
from torchvision import transforms, datsets
|
| 5 |
+
from torch.utils.data.sampler import SubsetRandomSampler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# This is a VGG-16 architecture (16 layers) - just for reference or if you want to build on top of it
|
| 12 |
+
class VGG16(nn.Module):
|
| 13 |
+
def __init__(self, num_classes = 2):
|
| 14 |
+
# Has two classes, crosswalk or background.
|
| 15 |
+
self.layer1 = nn.Sequential(
|
| 16 |
+
nn.Conv2d(3, 342, kernel_size=3, stride=1, padding=1),
|
| 17 |
+
nn.BatchNorm2d(64),
|
| 18 |
+
nn.ReLU()
|
| 19 |
+
),
|
| 20 |
+
self.layer2 = nn.Sequential(
|
| 21 |
+
nn.Conv2d(3, 342, kernel_size=3, stride=1, padding=1),
|
| 22 |
+
nn.BatchNorm2d(64),
|
| 23 |
+
nn.ReLU(),
|
| 24 |
+
nn.MaxPool2d(kernel_size=2)
|
| 25 |
+
),
|
| 26 |
+
self.layer3 = nn.Sequential(
|
| 27 |
+
nn.Conv2d(3, 342, kernel_size=3, stride=1, padding=1),
|
| 28 |
+
nn.BatchNorm2d(64),
|
| 29 |
+
nn.ReLU()
|
| 30 |
+
),
|
| 31 |
+
self.layer4 = nn.Sequential(
|
| 32 |
+
nn.Conv2d(3, 342, kernel_size=3, stride=1, padding=1),
|
| 33 |
+
nn.BatchNorm2d(64),
|
| 34 |
+
nn.ReLU()
|
| 35 |
+
),
|
| 36 |
+
self.layer5 = nn.Sequential(
|
| 37 |
+
nn.Conv2d(3, 342, kernel_size=3, stride=1, padding=1),
|
| 38 |
+
nn.BatchNorm2d(64),
|
| 39 |
+
nn.ReLU()
|
| 40 |
+
),
|
| 41 |
+
self.layer6 = nn.Sequential(
|
| 42 |
+
nn.Conv2d(3, 342, kernel_size=3, stride=1, padding=1),
|
| 43 |
+
nn.BatchNorm2d(64),
|
| 44 |
+
nn.ReLU()
|
| 45 |
+
)
|
SightLinks-Dev-main/DevelopmentTools/VGG_TL.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torchvision import transforms, models
|
| 4 |
+
from torch.utils.data import DataLoader, Subset
|
| 5 |
+
from torchvision.datasets import ImageFolder
|
| 6 |
+
from ClassUtils import CrosswalkDataset
|
| 7 |
+
import numpy as np
|
| 8 |
+
import random
|
| 9 |
+
import time
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
import warnings
|
| 13 |
+
# Torchvision's models utils has a depreciation warning for the pretrained parameter in its instantiation but we don't use that
|
| 14 |
+
warnings.filterwarnings(
|
| 15 |
+
action='ignore',
|
| 16 |
+
category=DeprecationWarning,
|
| 17 |
+
module=r'.*'
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# In a later version, this could be moved to a configuration file.
|
| 21 |
+
learning_rate = 4e-3
|
| 22 |
+
epoch_num = 25
|
| 23 |
+
|
| 24 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 25 |
+
vgg16 = models.vgg16(weights = models.VGG16_Weights)
|
| 26 |
+
# Modifies fully connected layer to output binary class predictions
|
| 27 |
+
vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, 2)
|
| 28 |
+
|
| 29 |
+
# Freeze as you see fit depending on the application you want ot design
|
| 30 |
+
# for param in vgg16.features.parameters():
|
| 31 |
+
# param.requires_grad = False
|
| 32 |
+
# for param in vgg16.classifier[:6].parameters():
|
| 33 |
+
# param.requires_grad = False
|
| 34 |
+
|
| 35 |
+
vgg16 = vgg16.to(device)
|
| 36 |
+
loss_function = nn.BCELoss()
|
| 37 |
+
|
| 38 |
+
# Prevents accidental loading of the whole training process in the background
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
# Takes only the classifier layers, which have not been frozen
|
| 41 |
+
optimiser = torch.optim.Adam(params=
|
| 42 |
+
filter(lambda p: p.requires_grad, vgg16.parameters()),
|
| 43 |
+
lr=learning_rate)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
training_dataset = CrosswalkDataset("zebra_annotations/classification_data")
|
| 47 |
+
training_loader = DataLoader(Subset(training_dataset, random.sample(range(len(training_dataset)-1), 25000)), batch_size=128, shuffle=True)
|
| 48 |
+
|
| 49 |
+
for param in vgg16.features.parameters():
|
| 50 |
+
param.requires_grad = False
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
vgg16.train()
|
| 54 |
+
print(len(training_dataset))
|
| 55 |
+
for epoch in range(epoch_num):
|
| 56 |
+
running_loss = 0.0
|
| 57 |
+
start_time = time.time()
|
| 58 |
+
last_time = start_time
|
| 59 |
+
for images, gt in training_loader:
|
| 60 |
+
images, gt = images.to(device), gt.to(device)
|
| 61 |
+
|
| 62 |
+
classifications = torch.sigmoid(vgg16(images))
|
| 63 |
+
loss = loss_function(classifications, gt)
|
| 64 |
+
optimiser.zero_grad()
|
| 65 |
+
loss.backward()
|
| 66 |
+
optimiser.step()
|
| 67 |
+
|
| 68 |
+
batch_time = time.time()
|
| 69 |
+
|
| 70 |
+
running_loss += loss.item()
|
| 71 |
+
|
| 72 |
+
last_time = batch_time
|
| 73 |
+
print(",,, ---")
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
print(f"\nEpoch {epoch + 1} of {epoch_num} has a per image loss of [{running_loss/len(training_loader):.4f}]")
|
| 77 |
+
print(f"{(last_time - start_time):.6f}")
|
| 78 |
+
|
| 79 |
+
# Includes the feature extraction layers
|
| 80 |
+
torch.save(vgg16.state_dict(), "VGG16_Full_State_Dict.pth")
|
| 81 |
+
# Only includes the classifier layer
|
| 82 |
+
# - the 'head' whose weights you can use to overwrite if you don't want to store the whole state dict file
|
| 83 |
+
torch.save(vgg16.classifier[6].state_dict(), "vgg16_binary_classifier_onlyHead.pth")
|
SightLinks-Dev-main/DevelopmentTools/automate_annotation.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# A set of functions to automatically annotate images for training a classification model of crosswalks. Requires a paid API key for the images but tile locations are free to access.
|
| 2 |
+
import overpy
|
| 3 |
+
from geopy.geocoders import Nominatim
|
| 4 |
+
import requests
|
| 5 |
+
|
| 6 |
+
MAPBOX_API_KEY = None
|
| 7 |
+
crosswalk_images_folder = "crosswalk_images"
|
| 8 |
+
|
| 9 |
+
# Returns a list of crosswalk locations based on a given location geo-code, such as "Bloomsbury, London, UK"
|
| 10 |
+
# Geo-codes for a location may differ from the common name for a location, so check before putting in a location
|
| 11 |
+
def get_cross_walk_location(location_geo_code, bounds=0.001):
|
| 12 |
+
over_api = overpy.Overpass()
|
| 13 |
+
geo_locator = Nominatim(user_agent="crosswalk_locator")
|
| 14 |
+
location = geo_locator.geocode(location_geo_code)
|
| 15 |
+
|
| 16 |
+
if not location:
|
| 17 |
+
raise ValueError("This location could not be geocoded - try an alternative name?.")
|
| 18 |
+
|
| 19 |
+
geo_bounding_box = (location.latitude - bounds, location.longitude - bounds,
|
| 20 |
+
location.latitude + bounds, location.longitude + bounds) # South, West, North, East
|
| 21 |
+
|
| 22 |
+
query = f"""
|
| 23 |
+
[out:json][timeout:25];
|
| 24 |
+
(
|
| 25 |
+
node["highway"="crossing"]({geo_bounding_box[0]},{geo_bounding_box[1]},{geo_bounding_box[2]},{geo_bounding_box[3]});
|
| 26 |
+
);
|
| 27 |
+
out body;
|
| 28 |
+
""" # Should return all crosswalks within {bounds} longitude/ latitude points, in a square shape
|
| 29 |
+
|
| 30 |
+
result = over_api.query(query)
|
| 31 |
+
|
| 32 |
+
crosswalks = []
|
| 33 |
+
for node in result.nodes:
|
| 34 |
+
crosswalks.append((node.lat, node.lon))
|
| 35 |
+
# Now we have a list of all the (latitude, longitude) pairs in that area,
|
| 36 |
+
# and can use it access the images of those areas for annotation
|
| 37 |
+
|
| 38 |
+
return crosswalks
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# Gets the aerial image associated with a particular longitude and latitude.
|
| 42 |
+
# Uses the mapbox api, which is cheaper alternative to google maps static tiles API, but has lower res. images.
|
| 43 |
+
def get_mapbox_aerial_image(location, output_file_name, zoom=18, size="256x256", style="satellite-v9",
|
| 44 |
+
api_key=MAPBOX_API_KEY):
|
| 45 |
+
longitude, latitude = location # We split here, so it's easier to map this function onto a set of (lon, lat) coords
|
| 46 |
+
base_url = f"https://api.mapbox.com/styles/v1/mapbox/{style}/static"
|
| 47 |
+
|
| 48 |
+
coords = f"{latitude},{longitude},{zoom}"
|
| 49 |
+
url = f"{base_url}/{coords}/{size}?access_token={api_key}"
|
| 50 |
+
|
| 51 |
+
response = requests.get(url)
|
| 52 |
+
|
| 53 |
+
if response.status_code == 200:
|
| 54 |
+
with open(output_file_name, "wb") as file:
|
| 55 |
+
file.write(response.content)
|
| 56 |
+
print(f"Retrieved aerial image and saved it as {output_file_name}")
|
| 57 |
+
|
| 58 |
+
else:
|
| 59 |
+
print("Error, Error, Error!", response.status_code, response.text)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# Saves a list of crosswalk locations for the given geo-code to a folder, and returns the names of the saved files
|
| 63 |
+
# This function executes the setup required for training a classification model requiring 256x256 images as ours did
|
| 64 |
+
# If you want to change the model you are training, adjsut the get_mapbox_aerial_image function
|
| 65 |
+
def get_crosswalk_images(geo_code, folder=crosswalk_images_folder, file_extension="png"):
|
| 66 |
+
crosswalk_image_files = []
|
| 67 |
+
crosswalk_set = get_cross_walk_location(geo_code, bounds=0.1)
|
| 68 |
+
for crosswalk in crosswalk_set:
|
| 69 |
+
formatted_crosswalk = list(map(float, crosswalk))
|
| 70 |
+
stored_file_name = str(formatted_crosswalk)
|
| 71 |
+
try:
|
| 72 |
+
get_mapbox_aerial_image(formatted_crosswalk,
|
| 73 |
+
f"{folder}/{formatted_crosswalk}.{file_extension}")
|
| 74 |
+
crosswalk_image_files.append(stored_file_name)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(e)
|
| 77 |
+
|
| 78 |
+
return crosswalk_image_files
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
#print(len(get_cross_walk_location("Bloomsbury, London, UK")))
|
SightLinks-Dev-main/DevelopmentTools/experimental_resources/FeatureExtractionMethods.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Feature Extraction Experimental Methods - Our method actually does not signficiantly benefit from
|
| 2 |
+
# common preprocessing techniques, so we created a set of more uncommon/ niche methods as well as some
|
| 3 |
+
# other untested methods to experiment with which features best represent the crosswalk features.
|
| 4 |
+
|
| 5 |
+
# This hopefully should be of great use to anyone trying to train their own specialised classifier using
|
| 6 |
+
# our pipeline - hopefully one of these works for you!
|
| 7 |
+
|
| 8 |
+
# All these methods work on a numpy array - if you want to use them convert your image to numpy first
|
| 9 |
+
# Most of the methods assume the channels of the image are at the end
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from scipy.signal import convolve2d
|
| 13 |
+
import skimage.feature as skf
|
| 14 |
+
from cv2 import NMSBoxes
|
| 15 |
+
|
| 16 |
+
from scipy.ndimage import maximum_filter
|
| 17 |
+
from collections import deque
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# Was required for keeping several of the methods optimised.
|
| 22 |
+
def matrixConvolution(image, kernel, padding=True):
|
| 23 |
+
convolvedImage = np.zeros_like(image)
|
| 24 |
+
|
| 25 |
+
# Again, this assumes the channels are placed at the end of the image
|
| 26 |
+
for c in range(image.shape[2]):
|
| 27 |
+
convolvedImage[..., c] = convolve2d(image[..., c], kernel, mode='same', boundary='wrap')
|
| 28 |
+
|
| 29 |
+
return convolvedImage
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# Different more traditional image processing methods, these may not be of particular use in our project,
|
| 33 |
+
# but could be useful for other developers using this pipeline.
|
| 34 |
+
class ImageProcessingFeatures:
|
| 35 |
+
def __init__(self):
|
| 36 |
+
self.gaussianKernel = self.generateGaussianKernel(5, 1)
|
| 37 |
+
|
| 38 |
+
# Converts an image to grayscale - but using binary thresholding of the averaged colour channels
|
| 39 |
+
# Image in format [n, m, k] where k is the channels and n, m are the dimensions
|
| 40 |
+
def binaryThresholding(self, image, threshold):
|
| 41 |
+
|
| 42 |
+
averaged = np.mean(image, axis=2)
|
| 43 |
+
thresholded = averaged > threshold
|
| 44 |
+
|
| 45 |
+
return thresholded
|
| 46 |
+
|
| 47 |
+
# A bit unintuitively named, but this takes an already grayscale image and applies binary thresholding to it
|
| 48 |
+
def grayscaleBinaryThresholding(self, grayscaleImage, threshold):
|
| 49 |
+
return grayscaleImage > threshold
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Converts an image to grayscale - it has several possible schema it can use but defaults to the lightness method
|
| 53 |
+
def grayscaleConversion(self, image, schema="lightness"):
|
| 54 |
+
# https://tannerhelland.com/2011/10/01/grayscale-image-algorithm-vb6.html
|
| 55 |
+
grayImage = np.zeros(np.shape(image)[:-1])
|
| 56 |
+
|
| 57 |
+
if schema == "average":
|
| 58 |
+
grayImage = np.mean(image, axis=2)
|
| 59 |
+
|
| 60 |
+
if schema == "lightness":
|
| 61 |
+
# (max(R, G, B) + min(R, G, B)) / 2 --> Sometimes called desaturation
|
| 62 |
+
grayImage = (np.max(image, axis=2) + np.min(image, axis=2)) / 2
|
| 63 |
+
|
| 64 |
+
if schema == "luma":
|
| 65 |
+
# I assume RGB colour ordering in the image here -- feel free to overwrite in your implementation
|
| 66 |
+
# (Red * 0.2126 + Green * 0.7152 + Blue * 0.0722)
|
| 67 |
+
colourWeighting = np.array([0.2126, 0.7152, 0.0722])
|
| 68 |
+
grayImage = np.dot(image[..., :3], colourWeighting)
|
| 69 |
+
|
| 70 |
+
if schema == "decomposition":
|
| 71 |
+
# This is a maximum decomposition - minumum decomposition can be implemented by just switching our for min...
|
| 72 |
+
grayImage = np.max(image, axis=2)
|
| 73 |
+
|
| 74 |
+
return grayImage
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Sharpens the image by convolving a preset laplace kernel with it
|
| 78 |
+
# My chosen kernel is a pretty standard one that takes into consideration the corners - the 8 adjacencies one
|
| 79 |
+
# [-1, -1, -1], [-1, 8, -1], [-1, -1, -1]
|
| 80 |
+
def laplaceTransform(self, image):
|
| 81 |
+
laplaceKernel=[[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]]
|
| 82 |
+
convolvedImage = None
|
| 83 |
+
|
| 84 |
+
# RGB image
|
| 85 |
+
if len(np.shape(image)) == 3:
|
| 86 |
+
convolvedImage = matrixConvolution(image, laplaceKernel)
|
| 87 |
+
|
| 88 |
+
# GrayScale image
|
| 89 |
+
if len(np.shape(image)) == 2:
|
| 90 |
+
convolvedImage = convolve2d(image, laplaceKernel, mode='same', boundary='wrap')
|
| 91 |
+
|
| 92 |
+
return convolvedImage
|
| 93 |
+
|
| 94 |
+
# Assumes Grayscale images
|
| 95 |
+
# https://en.wikipedia.org/wiki/Sobel_operator - naive implementation by me, can probably be massively improved
|
| 96 |
+
# Basically takes the vertical and horizontal gradients using convolution with a sobel operator and combines them.
|
| 97 |
+
def sobelConvolution(self, image):
|
| 98 |
+
# Gx
|
| 99 |
+
verticalKernel = [[-1, 0, 1],
|
| 100 |
+
[-2, 0, 2],
|
| 101 |
+
[-1, 0, 1]]
|
| 102 |
+
|
| 103 |
+
# Gy
|
| 104 |
+
horizontalKernel = [[-1, -2, -1],
|
| 105 |
+
[0, 0, 0],
|
| 106 |
+
[1, 2, 1]]
|
| 107 |
+
|
| 108 |
+
horizontalGrad = convolve2d(image, horizontalKernel, mode='same', boundary='wrap')
|
| 109 |
+
verticalGrad = convolve2d(image, verticalKernel, mode='same', boundary='wrap')
|
| 110 |
+
|
| 111 |
+
sobelConvolvedImage = np.sqrt(np.square(horizontalGrad) + np.square(verticalGrad))
|
| 112 |
+
|
| 113 |
+
# You could go further to find the gradient direction by calculating angle with some trigonometry - atan2(Gy, Gx)
|
| 114 |
+
return sobelConvolvedImage
|
| 115 |
+
|
| 116 |
+
# Part of the calculations for canny edge detection, a tri-threshold operations. Requires greyscale images.
|
| 117 |
+
# Classifies into strong edges and weak edges based on pixel intensity
|
| 118 |
+
def doubleFiltering(self, image, weakThreshold=75, strongThreshold=200):
|
| 119 |
+
weak, strong = 125, 255
|
| 120 |
+
|
| 121 |
+
strong_edges = image >= strongThreshold
|
| 122 |
+
weak_edges = (image >= weakThreshold) & (image < strongThreshold)
|
| 123 |
+
|
| 124 |
+
result = np.zeros_like(image, dtype=np.uint8)
|
| 125 |
+
result[strong_edges] = strong
|
| 126 |
+
result[weak_edges] = weak
|
| 127 |
+
|
| 128 |
+
return result, strong, weak
|
| 129 |
+
|
| 130 |
+
# Technical name: edge tracking by hysteresis. This is an 'optimised' dequeue approach (to the best of my ability)
|
| 131 |
+
def followEdges(self, weakEdges, strongEdges):
|
| 132 |
+
h, w = weakEdges.shape
|
| 133 |
+
directions = [(-1, -1), (-1, 0), (0, -1), (1, -1), (-1, 1), (0, 0), (0, 1), (1, 0), (1, 1)]
|
| 134 |
+
finalEdges = (strongEdges.copy() > 0) # must include strong edges, it's guaranteed, and we want a binary output.
|
| 135 |
+
edgeQueue = deque(np.argwhere(strongEdges == 1))
|
| 136 |
+
|
| 137 |
+
while edgeQueue:
|
| 138 |
+
y, x = edgeQueue.popleft()
|
| 139 |
+
for dy, dx in directions:
|
| 140 |
+
ny, nx = y + dy, x + dx
|
| 141 |
+
|
| 142 |
+
# Expand the branch if they're connected to a weak edge, else kill this branch
|
| 143 |
+
if 0 <= ny < h and 0 <= nx < w and weakEdges[ny, nx] > 0:
|
| 144 |
+
weakEdges[ny, nx] = 0
|
| 145 |
+
finalEdges[ny, nx] = 1
|
| 146 |
+
edgeQueue.append((ny, nx))
|
| 147 |
+
|
| 148 |
+
return finalEdges
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# Much better than sobel in terms of accuracy and removing false edges produced due to noise, but slower.
|
| 152 |
+
# https://en.wikipedia.org/wiki/Canny_edge_detector + a lot of chatGPT prompts
|
| 153 |
+
def cannyEdgeDetection(self, image):
|
| 154 |
+
# Ensures that the image is in greyscale so we don't have any issues.
|
| 155 |
+
if (np.shape(image) == 3):
|
| 156 |
+
image = ImageProcessingFeatures.grayscaleConversion(image, schema="average")
|
| 157 |
+
|
| 158 |
+
# Takes a greyscale image and returns a set of strong and weak edges (see wikipedia page)
|
| 159 |
+
blurred = convolve2d(image, self.gaussianKernel, mode='same', boundary='wrap')
|
| 160 |
+
gradientMagnitude = self.sobelConvolution(blurred)
|
| 161 |
+
maxFiltered = maximum_filter(gradientMagnitude, size=3, mode='constant')
|
| 162 |
+
suppressed = np.where(gradientMagnitude==maxFiltered, gradientMagnitude, 0)
|
| 163 |
+
thresholded, strong, weak = self.doubleFiltering(suppressed)
|
| 164 |
+
|
| 165 |
+
final_edges = self.followEdges(strong, weak)
|
| 166 |
+
# We expand the definition of strong edges to include weak edges that are adjacent to strong edges
|
| 167 |
+
|
| 168 |
+
return final_edges
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# Convolve this with the image to produce a Gaussian Blur effect
|
| 172 |
+
# In a real application you should precompute this, it's really unoptimised
|
| 173 |
+
def generateGaussianKernel(self, size, sigma):
|
| 174 |
+
center = size // 2
|
| 175 |
+
kernel = np.zeros((size, size))
|
| 176 |
+
|
| 177 |
+
for i in range(size):
|
| 178 |
+
for j in range(size):
|
| 179 |
+
sides = np.sqrt((i - center) ** 2 + (j - center) ** 2)
|
| 180 |
+
kernel[i, j] = np.exp(-(sides ** 2) / (2 * sigma ** 2)) # According to the formula
|
| 181 |
+
|
| 182 |
+
return kernel / np.sum(kernel)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# This should be applied to a grayscale image!
|
| 186 |
+
def differenceOfGaussians(self, image, size=5, sigmaOne=1, sigmaTwo=1.5, brightFeatureFocus=True):
|
| 187 |
+
# This by default uses a 5x5 kernel with sigmaOne = 1 and sigmaTwo = 2. These are not finetuned values but follow the general
|
| 188 |
+
# principle that sigma should not be greater than approx. 3*dims. Be careful to consider the relationship between the dimension
|
| 189 |
+
# and sigma values (and between the two sigmas themselves) to preserve the gaussian property!
|
| 190 |
+
kernelOne, kernelTwo = self.generateGaussianKernel(size, sigmaOne), self.generateGaussianKernel(size, sigmaTwo)
|
| 191 |
+
|
| 192 |
+
primaryImage = convolve2d(image, kernelOne, mode='same', boundary='wrap')
|
| 193 |
+
backgroundImage = convolve2d(image, kernelTwo, mode='same', boundary='wrap')
|
| 194 |
+
|
| 195 |
+
diffOfGaussians = primaryImage - backgroundImage
|
| 196 |
+
|
| 197 |
+
# Enhances bright feature edges (our focus in crosswalks), but this is a thing that might vary based on your focus. Can be disabled!
|
| 198 |
+
if brightFeatureFocus:
|
| 199 |
+
diffOfGaussians = diffOfGaussians[diffOfGaussians > 0]
|
| 200 |
+
|
| 201 |
+
return diffOfGaussians
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# Different feature extraction methods that attempt to quantify the complexity in an image.
|
| 205 |
+
# Generally seperated into local complexity per region, and complexity of an image as a whole.
|
| 206 |
+
# Intended to potentially detect the occlusion in an image, e.g. more treees --> more complexity hopefully.
|
| 207 |
+
class ComplexityFeatures:
|
| 208 |
+
def __init__(self):
|
| 209 |
+
pass
|
| 210 |
+
|
| 211 |
+
# Definition: Mathematical measure of how complex an image or pattern is
|
| 212 |
+
# In our case (Grayscale image), it measures how much the detail in an image changes with the scale it is perceived at.
|
| 213 |
+
# The particular method we will the basic box-counting method
|
| 214 |
+
# Our definition of a useful feature map is a binary-thresholded line detection method using difference of Gaussians
|
| 215 |
+
def fractalDimension(self, image, minimumBoxSize=2, imageStructureThreshold=0.9):
|
| 216 |
+
|
| 217 |
+
if len(image.shape) > 2:
|
| 218 |
+
image = ImageProcessingFeatures.grayscaleConversion(image, schema="average")
|
| 219 |
+
|
| 220 |
+
image = ImageProcessingFeatures.sobelConvolution(image) # Could also go for DoG or Laplace too - most edge detectors work
|
| 221 |
+
image = np.int_(255 * (image - np.min(image)) / (np.max(image) - np.min(image)))
|
| 222 |
+
image = ImageProcessingFeatures.grayscaleBinaryThresholding(image, imageStructureThreshold)
|
| 223 |
+
|
| 224 |
+
N, M = image.shape
|
| 225 |
+
|
| 226 |
+
startingBoxSize = min(N, M) // 4
|
| 227 |
+
boxSizes = [s for s in range(minimumBoxSize, startingBoxSize + 1, 2) if min(N, M) % s == 0]
|
| 228 |
+
logSizes, logCounts = [], []
|
| 229 |
+
|
| 230 |
+
for boxSize in boxSizes:
|
| 231 |
+
numBoxes = 0
|
| 232 |
+
|
| 233 |
+
for i in range(0, N, boxSize):
|
| 234 |
+
for j in range(0, M, boxSize):
|
| 235 |
+
if np.any(image[i:i+boxSize, j:j+boxSize]): # Check if box contains a 1 (part of the image structure)
|
| 236 |
+
numBoxes += 1
|
| 237 |
+
|
| 238 |
+
if numBoxes > 0:
|
| 239 |
+
logSizes.append(np.log(1.0 / boxSizes))
|
| 240 |
+
logCounts.append(np.log(numBoxes))
|
| 241 |
+
|
| 242 |
+
slope, _ = np.polyfit(logSizes, logCounts, 1)
|
| 243 |
+
# Finds the relationship of the sizes and the image structure contained as the image scale decreases
|
| 244 |
+
|
| 245 |
+
return slope
|
| 246 |
+
|
| 247 |
+
# Come back to
|
| 248 |
+
def waveletBasedFractalTransform(self, image):
|
| 249 |
+
pass
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
# A set of feature extraction methods inspired by texture analysis methods
|
| 253 |
+
# just a wild throw in the dark, not sure how useful these could be as a feature
|
| 254 |
+
# Many of these should be applied in sliding window approaches or in regions, or to the whole image if you have a feature vector
|
| 255 |
+
class TextureFeatures:
|
| 256 |
+
def __init__(self):
|
| 257 |
+
pass
|
| 258 |
+
|
| 259 |
+
def lbpCompare(self, threshold, value):
|
| 260 |
+
return 0 if value < threshold else 1
|
| 261 |
+
|
| 262 |
+
# A matter of personal preference, but this is a spiral concatenation for the local binary pattern signature generation
|
| 263 |
+
# You could do row by row, but this is my preferred method. Feel free to overwrite, it shouldn't make a difference as long as you're consistent
|
| 264 |
+
def spiral_concatenation(self, vals, dims):
|
| 265 |
+
summed = []
|
| 266 |
+
dirs = [(0, 1), (1, 0), (0, -1), (-1, 0)]
|
| 267 |
+
cur_dir = 0
|
| 268 |
+
cur_x, cur_y = 0, 0
|
| 269 |
+
bounds = dims
|
| 270 |
+
|
| 271 |
+
while bounds > 0:
|
| 272 |
+
for i in range(bounds):
|
| 273 |
+
summed.append(str(vals[cur_y * dims + cur_x]))
|
| 274 |
+
cur_y, cur_x = cur_y + dirs[cur_dir][0], cur_x + dirs[cur_dir][1]
|
| 275 |
+
cur_dir = (cur_dir + 1) % 4
|
| 276 |
+
|
| 277 |
+
bounds -= 1
|
| 278 |
+
|
| 279 |
+
if bounds > 0:
|
| 280 |
+
for i in range(bounds):
|
| 281 |
+
summed.append(str(vals[cur_y * dims + cur_x]))
|
| 282 |
+
cur_y, cur_x = cur_y + dirs[cur_dir][0], cur_x + dirs[cur_dir][1]
|
| 283 |
+
cur_dir = (cur_dir + 1) % 4
|
| 284 |
+
bounds -= 1
|
| 285 |
+
|
| 286 |
+
return "".join(summed)
|
| 287 |
+
|
| 288 |
+
# Basically captures the local binary changes in texture in an image - a potentially useful feature for our crosswalk detector that
|
| 289 |
+
# works on a very similar principle with the local regions of interest found by the first.
|
| 290 |
+
# It generates a signature for each local region that can be used to compare them quite easily in applications like texture analysis.
|
| 291 |
+
# CAREFUL - THIS IS A STRING FEATURE, NOT A NUMERICAL ONE
|
| 292 |
+
def localBinaryPattern(self, image, dims):
|
| 293 |
+
imgWidth, imgLength = len(image[0]), len(image)
|
| 294 |
+
edge = dims // 2
|
| 295 |
+
lbpList = []
|
| 296 |
+
for row in range(edge, imgLength - edge):
|
| 297 |
+
for pixel in range(edge, imgWidth - edge):
|
| 298 |
+
neighborhood = image[row - edge: row + edge + 1, pixel - edge: pixel + edge + 1]
|
| 299 |
+
|
| 300 |
+
centralPixel = image[row][pixel]
|
| 301 |
+
binaryVals = [self.lbpCompare(centralPixel, val) for val in neighborhood.flatten()]
|
| 302 |
+
|
| 303 |
+
lbpSignature = self.spiral_concatenation(binaryVals, dims)
|
| 304 |
+
lbpList.append(lbpSignature)
|
| 305 |
+
|
| 306 |
+
return lbpList
|
| 307 |
+
|
| 308 |
+
# https://medium.com/@girishajmera/feature-extraction-of-images-using-glcm-gray-level-cooccurrence-matrix-e4bda8729498
|
| 309 |
+
# Link above explains the function quite well and succinctly. This takes in both coloured (n, m, k) and grayscale arrays (n, m).
|
| 310 |
+
# Captures the spatial relationships between neighbouring gray levels/ Intensities
|
| 311 |
+
def grayLevelCoOccurenceMatrix(self, image, pixelOffset=5, preserveMatrix=False):
|
| 312 |
+
if not (np.shape(image) == 2):
|
| 313 |
+
# P.S - this assumes that RGB and [n, m, k] formats are followed. Can throw errors otherwise.
|
| 314 |
+
image = ImageProcessingFeatures.grayscaleConversion(image, schema="average")
|
| 315 |
+
|
| 316 |
+
transformedArray = np.int_(255 * (image - np.min(image)) / (np.max(image) - np.min(image)))
|
| 317 |
+
|
| 318 |
+
distances = [pixelOffset]
|
| 319 |
+
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
|
| 320 |
+
|
| 321 |
+
# A bit of a cop out, but this just does it all for us. The metrics you choose to extract from this depend.
|
| 322 |
+
glcm = skf.graycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
|
| 323 |
+
|
| 324 |
+
# This is a set of features that I personally thought might be useful for the crosswalks - but there are many other extractable features
|
| 325 |
+
contrast = skf.graycoprops(glcm, 'contrast')
|
| 326 |
+
energy = skf.graycoprops(glcm, 'energy')
|
| 327 |
+
homogeneity = skf.graycoprops(glcm, 'homogeneity')
|
| 328 |
+
correlation = skf.graycoprops(glcm, 'correlation')
|
| 329 |
+
|
| 330 |
+
# Single value metrics for the image
|
| 331 |
+
metrics = (np.mean(contrast.flatten()), np.mean(energy.flatten()), np.mean(homogeneity.flatten()), np.mean(correlation.flatten()))
|
| 332 |
+
|
| 333 |
+
if preserveMatrix:
|
| 334 |
+
return metrics, (contrast, energy, homogeneity, correlation)
|
| 335 |
+
|
| 336 |
+
return metrics
|
| 337 |
+
|
| 338 |
+
|
SightLinks-Dev-main/DevelopmentTools/experimental_resources/SelfSupervisedFeatureExtraction.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Besides the usual feature extraction/ preprocessing methods, or statistical ones - we can also employ ML models to automatically
|
| 2 |
+
# build feature extraction layers, using self-supervised learning for example.
|
| 3 |
+
|
| 4 |
+
# This is an implementation of simCLR
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torchvision.transforms as transforms
|
| 9 |
+
import torchvision.models as models
|
| 10 |
+
import torchvision.datasets as datasets
|
| 11 |
+
from torch.utils.data import DataLoader
|
| 12 |
+
import numpy as np
|
| 13 |
+
import random
|
| 14 |
+
|
| 15 |
+
# Applies random data augmentation techniques to the image, following the "simCLR" methodology
|
| 16 |
+
# https://arxiv.org/pdf/2002.05709
|
| 17 |
+
class SimCLRAugmentationTransform:
|
| 18 |
+
def __init__(self):
|
| 19 |
+
# Following the default augmentations listed out in their paper's Data Augmentation Details
|
| 20 |
+
# If you want to add in more transforms, remember to use transforms.RandomApply
|
| 21 |
+
self.transform = transforms.Compose([
|
| 22 |
+
transforms.RandomResizedCrop(size=224,),
|
| 23 |
+
transforms.RandomHorizontalFlip(),
|
| 24 |
+
transforms.RandomApply(
|
| 25 |
+
[transforms.ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.25)],
|
| 26 |
+
p=0.8),
|
| 27 |
+
transforms.RandomGrayscale(p=0.2),
|
| 28 |
+
transforms.GaussianBlur(kernel_size=3),
|
| 29 |
+
transforms.ToTensor(),
|
| 30 |
+
transforms.Normalize(mean=(0.5,), std=(0.5,), inplace=False),
|
| 31 |
+
])
|
| 32 |
+
|
| 33 |
+
# We want to produce two contrasting views of the image
|
| 34 |
+
def __call__(self, x):
|
| 35 |
+
return self.transform(x), self.transform(x)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# See: https://medium.com/data-science/nt-xent-normalized-temperature-scaled-cross-entropy-loss-explained-and-implemented-in-pytorch-cc081f69848
|
| 39 |
+
# for a pretty in-depth and clear explanation of how it works. TLDR: Normalise, temperature scale, Cross-Entropy loss.
|
| 40 |
+
class NTXentLoss(nn.Module):
|
| 41 |
+
def __init__(self, temperature = 0.5):
|
| 42 |
+
super().__init__()
|
| 43 |
+
self.temperature = temperature
|
| 44 |
+
self.cosSimilarity = nn.CosineSimilarity(dim=-1)
|
| 45 |
+
self.CEL = nn.CrossEntropyLoss(reduction="mean")
|
| 46 |
+
|
| 47 |
+
def forward(self, z_i, z_j, temperature=None):
|
| 48 |
+
if temperature is None:
|
| 49 |
+
temperature = self.temperature
|
| 50 |
+
x = torch.cat([z_i, z_j], dim=0)
|
| 51 |
+
xcs = self.cosSimilarity(x[None,:,:], x[:,None,:])
|
| 52 |
+
|
| 53 |
+
# Naturally, each embedding will have a 1-1 similarity with itself, so will contribute nothing useful,
|
| 54 |
+
# so we zero these contributions.
|
| 55 |
+
xcs[torch.eye(xcs.shape[0], dtype=torch.bool)] = -float('inf')
|
| 56 |
+
|
| 57 |
+
# Math works out on these I think, because they're in contigious pairs as an input.
|
| 58 |
+
target = torch.arange(len(z_i) * 2)
|
| 59 |
+
target[0::2] += 1
|
| 60 |
+
target[1::2] -= 1
|
| 61 |
+
|
| 62 |
+
return self.CEL(xcs/ temperature, target)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# A SimCLR model - to use just override the getModel function to whatever model you want to use,
|
| 66 |
+
# and post-training remove the projection layer and add in your classifier head (overwrite model's fully connected layer)
|
| 67 |
+
class SimCLR(nn.Module):
|
| 68 |
+
def __init__(self, encoderModel="MobileNet_V3", outDim=128):
|
| 69 |
+
super().__init__()
|
| 70 |
+
self.encoder = self.getModel(encoderModel)
|
| 71 |
+
self.projectionHead = nn.Sequential(
|
| 72 |
+
nn.Linear(1000, 256),
|
| 73 |
+
nn.LeakyReLU(),
|
| 74 |
+
nn.Linear(256, outDim)
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def getModel(self, modelName):
|
| 79 |
+
modelDictionairy = {
|
| 80 |
+
"ResNet-50": models.resnet50(progress=True),
|
| 81 |
+
"ResNet-18": models.resnet18(progress=True),
|
| 82 |
+
"MobileNet_V3": models.mobilenet_v3_small(progress=True),
|
| 83 |
+
}
|
| 84 |
+
# This should only be evaluated upon the getting of a particular model (calling of a particular index)
|
| 85 |
+
# - shouldn't have to install all of these until used unless there's a bug
|
| 86 |
+
model = modelDictionairy[modelName]
|
| 87 |
+
model.fc = nn.Identity() # This should be overwritten in your downstream application
|
| 88 |
+
|
| 89 |
+
return model
|
| 90 |
+
|
| 91 |
+
def forward(self, x):
|
| 92 |
+
features = self.encoder(x)
|
| 93 |
+
featureVector = self.projectionHead(features)
|
| 94 |
+
return featureVector
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# Since this is completely dependent on what dataset you use, this serves PURELY as a tutorial for how
|
| 98 |
+
# to put your own code in
|
| 99 |
+
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
batchSize = 128
|
| 102 |
+
transform = SimCLRAugmentationTransform()
|
| 103 |
+
dataset = datasets.CIFAR10(root="./data", train=True, transform=transform, download=True)
|
| 104 |
+
dataloader = DataLoader(dataset, batch_size=batchSize, shuffle=True)
|
| 105 |
+
|
| 106 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 107 |
+
model = SimCLR().to(device)
|
| 108 |
+
optimiser = torch.optim.Adam(model.parameters(), lr=2e-3)
|
| 109 |
+
lossFunction = NTXentLoss(temperature=0.5)
|
| 110 |
+
|
| 111 |
+
epochs = 2
|
| 112 |
+
lossTracker = []
|
| 113 |
+
print(len(dataloader))
|
| 114 |
+
for epoch in range(epochs):
|
| 115 |
+
eraLoss = 0.0
|
| 116 |
+
model.train()
|
| 117 |
+
|
| 118 |
+
for (x_i, x_j), _ in dataloader:
|
| 119 |
+
print(".")
|
| 120 |
+
x_i, x_j = x_i.to(device), x_j.to(device)
|
| 121 |
+
|
| 122 |
+
optimiser.zero_grad()
|
| 123 |
+
|
| 124 |
+
z_i, z_j = model(x_i), model(x_j)
|
| 125 |
+
|
| 126 |
+
loss = lossFunction(z_i, z_j)
|
| 127 |
+
loss.backward()
|
| 128 |
+
optimiser.step()
|
| 129 |
+
|
| 130 |
+
eraLoss += loss.item()
|
| 131 |
+
print("\n")
|
| 132 |
+
|
| 133 |
+
avgLoss = eraLoss / len(dataloader)
|
| 134 |
+
lossTracker.append(avgLoss)
|
| 135 |
+
print(f"In epoch {epoch+1} of {epoch+1}, there was a loss of {avgLoss:.4f}")
|
| 136 |
+
|
| 137 |
+
print("Completed Training!!!")
|
SightLinks-Dev-main/DevelopmentTools/experimental_resources/quantisedMobileNet.py
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BETA VERSION - NEEDS FURTHER DEVELOPMENT
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# Read these to catch up on what is (trying to at least) being done here
|
| 5 |
+
# https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html
|
| 6 |
+
# https://pytorch.org/docs/stable/quantization.html#model-preparation-for-eager-mode-static-quantization
|
| 7 |
+
|
| 8 |
+
# Torch implementation of these models - mine is heavily based on these with some minor adjustments
|
| 9 |
+
#
|
| 10 |
+
# I've added squeeze and excitation layers to the MobileNetV2, a feature of MobileNetV3, but I did not put in
|
| 11 |
+
# NAS (unnecessary since we're not optimising for mobile) or hardswish (because I prefer ReLU/ think it is better)
|
| 12 |
+
# https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py#L117
|
| 13 |
+
# https://github.com/pytorch/vision/blob/11bf27e37190b320216c349e39b085fb33aefed1/torchvision/models/mobilenetv3.py#L56
|
| 14 |
+
|
| 15 |
+
# This is an adapted version of MobileNet, somewhere between versions 2/3, as some features of 3 were not required. There are
|
| 16 |
+
# also some additions for our particular use case from miscallaneous sources
|
| 17 |
+
|
| 18 |
+
from torchvision import transforms
|
| 19 |
+
import torch
|
| 20 |
+
from torch import nn, Tensor
|
| 21 |
+
from torch.nn import functional as F
|
| 22 |
+
from torch.utils.data import DataLoader, Subset
|
| 23 |
+
import ClassUtils
|
| 24 |
+
|
| 25 |
+
from torch.ao.quantization import QuantStub, DeQuantStub
|
| 26 |
+
from torchvision.models.mobilenetv2 import _make_divisible
|
| 27 |
+
|
| 28 |
+
import time
|
| 29 |
+
import random
|
| 30 |
+
import os
|
| 31 |
+
import matplotlib.pyplot as plt
|
| 32 |
+
|
| 33 |
+
# Squeeze: summarising global context by pooling feature maps into a single value
|
| 34 |
+
# Excitation: Learning attention weights for each channel to prioritise the most relevant ones
|
| 35 |
+
class SqueezeExcitation(nn.Module):
|
| 36 |
+
def __init__(self, input_channels:int, squeeze_factor: int = 4):
|
| 37 |
+
super().__init__()
|
| 38 |
+
# If channels are a multiple of 8, they're optimised by the hardware
|
| 39 |
+
squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
|
| 40 |
+
self.squeeze = nn.Conv2d(input_channels, squeeze_channels, 1)
|
| 41 |
+
self.relu = nn.ReLU(inplace=True)
|
| 42 |
+
self.unsqueeze = nn.Conv2d(squeeze_channels, input_channels, 1)
|
| 43 |
+
self.quant = nn.quantized.FloatFunctional()
|
| 44 |
+
|
| 45 |
+
# Scale returns the feature attention map, how much attention should be payed to each input layer, in range [0, 1]
|
| 46 |
+
# Inplace is used to save memory on operations - it might not be necessary in our case since we aren't using edge devices
|
| 47 |
+
def _scale(self, input: Tensor, inplace=bool) -> Tensor:
|
| 48 |
+
# Squeeze
|
| 49 |
+
scale = F.adaptive_avg_pool2d(input, 1)
|
| 50 |
+
scale = self.squeeze(scale)
|
| 51 |
+
# Excite
|
| 52 |
+
scale = self.relu(scale)
|
| 53 |
+
scale = self.unsqueeze(scale)
|
| 54 |
+
return F.hardsigmoid(scale, inplace=inplace)
|
| 55 |
+
|
| 56 |
+
def forward(self, input: Tensor) -> Tensor:
|
| 57 |
+
# print(self._scale(input, True))
|
| 58 |
+
# print(input)
|
| 59 |
+
return self.quant.mul(self._scale(input, True), input)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# The basic building block of our convolutional neural network
|
| 63 |
+
# - qconfig should automatically insert fakeQuantisation operations during training, so there is no need to manually place them now
|
| 64 |
+
class ConvBNReLu(nn.Sequential):
|
| 65 |
+
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
| 66 |
+
padding = (kernel_size - 1) // 2
|
| 67 |
+
super().__init__(
|
| 68 |
+
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
|
| 69 |
+
# No point applying a bias (constant addative term) if the next layer is a batch normalisation layer
|
| 70 |
+
nn.BatchNorm2d(out_planes, momentum=0.1),
|
| 71 |
+
nn.ReLU(inplace=True)
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# Like typical residual blocks but uses inverse narrow->wide->narrow, with Depth-wise convolutions instead of normal,
|
| 76 |
+
# to reduce the number of parameters required compared to the usual residual blocks
|
| 77 |
+
class InvertedResidual(nn.Module):
|
| 78 |
+
def __init__(self, inpt, oupt, stride, expnd_ratio, kernel_size=3, se_layer=None):
|
| 79 |
+
super().__init__()
|
| 80 |
+
|
| 81 |
+
self.stride = stride
|
| 82 |
+
assert stride in [1, 2]
|
| 83 |
+
|
| 84 |
+
intermediate_channels = int(round(inpt * expnd_ratio))
|
| 85 |
+
# If the stride != 1, downsampling occurs so cannot be true.
|
| 86 |
+
self.use_residual = (stride==1) and (inpt==oupt)
|
| 87 |
+
# Squeeze and excitation layer - applied after the dw and pw convolutions, but before the residual
|
| 88 |
+
self.se_layer = se_layer if se_layer else None
|
| 89 |
+
|
| 90 |
+
layers = []
|
| 91 |
+
|
| 92 |
+
if expnd_ratio != 1:
|
| 93 |
+
# Pointwise convolution to increase the channels
|
| 94 |
+
layers.append(ConvBNReLu(inpt, intermediate_channels, kernel_size=1))
|
| 95 |
+
|
| 96 |
+
layers.extend([
|
| 97 |
+
# Depthwise convolution - each channel is convoled on an independent basis
|
| 98 |
+
ConvBNReLu(intermediate_channels, intermediate_channels, stride=stride, groups=intermediate_channels),
|
| 99 |
+
# point-wise convolution - linear combination to reduce layers back to the expected number
|
| 100 |
+
nn.Conv2d(intermediate_channels, oupt, 1, 1, 0, bias=False),
|
| 101 |
+
nn.BatchNorm2d(oupt, momentum=0.25)
|
| 102 |
+
])
|
| 103 |
+
self.conv = nn.Sequential(*layers)
|
| 104 |
+
|
| 105 |
+
def forward(self, x):
|
| 106 |
+
outpt = self.conv(x)
|
| 107 |
+
if self.se_layer is not None:
|
| 108 |
+
outpt = self.se_layer(outpt)
|
| 109 |
+
|
| 110 |
+
if self.use_residual:
|
| 111 |
+
return x + outpt
|
| 112 |
+
else:
|
| 113 |
+
return outpt
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# Same as the inverted residual, but replaces addition with a quantizable friendly operation
|
| 117 |
+
class QuantizableInvertedResidual(InvertedResidual):
|
| 118 |
+
def __init__(self, inpt, outpt, stride, expnd_ratio, se_layer=None):
|
| 119 |
+
super().__init__(inpt, outpt, stride, expnd_ratio, se_layer=se_layer)
|
| 120 |
+
self.skip_add = nn.quantized.FloatFunctional()
|
| 121 |
+
|
| 122 |
+
# Overwrites the forwarding to use a quantizable friendly version of the addition
|
| 123 |
+
def forward(self, x):
|
| 124 |
+
outpt = self.conv(x)
|
| 125 |
+
if self.se_layer is not None:
|
| 126 |
+
outpt = self.se_layer(outpt)
|
| 127 |
+
|
| 128 |
+
if self.use_residual:
|
| 129 |
+
return self.skip_add.add(x, outpt)
|
| 130 |
+
else:
|
| 131 |
+
return outpt
|
| 132 |
+
|
| 133 |
+
# The MobileNetV2 Architecture + some features from V3 (squeeze and excitation) but I didn't add NAS since we aren't running this on mobile
|
| 134 |
+
# And I prefer ReLU over hardswish
|
| 135 |
+
class MobileNetV2_5(nn.Module):
|
| 136 |
+
def __init__(self, class_num=2, width_mult=1.0, round_nearest=8):
|
| 137 |
+
super().__init__()
|
| 138 |
+
|
| 139 |
+
layers = []
|
| 140 |
+
|
| 141 |
+
input_channel = 32
|
| 142 |
+
last_channel = 1280
|
| 143 |
+
|
| 144 |
+
# Just straight up copying this from the torchvision implementation
|
| 145 |
+
self.residual_params = [
|
| 146 |
+
# expnd_ratio, outpt_channels, num_blocks, stride
|
| 147 |
+
[1, 16, 1, 1],
|
| 148 |
+
[6, 24, 2, 2],
|
| 149 |
+
[6, 32, 3, 2],
|
| 150 |
+
[6, 64, 4, 2],
|
| 151 |
+
[6, 96, 3, 1],
|
| 152 |
+
[6, 160, 3, 2],
|
| 153 |
+
[6, 320, 1, 1],
|
| 154 |
+
]
|
| 155 |
+
|
| 156 |
+
first_conv_output_channels = _make_divisible(self.residual_params[0][1] *width_mult, round_nearest)
|
| 157 |
+
layers.append(
|
| 158 |
+
ConvBNReLu(3,
|
| 159 |
+
first_conv_output_channels,
|
| 160 |
+
kernel_size=3,
|
| 161 |
+
stride=2,
|
| 162 |
+
)
|
| 163 |
+
)
|
| 164 |
+
prev_input_channels = first_conv_output_channels
|
| 165 |
+
|
| 166 |
+
# Main body of feature extraction
|
| 167 |
+
for expnd, oupt_c, num_blocks, strd in self.residual_params:
|
| 168 |
+
# output channels must be a multiple of 8 for hardware optimisation
|
| 169 |
+
output_channel = _make_divisible(oupt_c * width_mult, round_nearest)
|
| 170 |
+
|
| 171 |
+
for i in range(num_blocks):
|
| 172 |
+
stride = strd if i == 0 else 1
|
| 173 |
+
se_layer = SqueezeExcitation(oupt_c) if i == 0 else None
|
| 174 |
+
layers.append(QuantizableInvertedResidual(prev_input_channels, output_channel, stride, expnd_ratio=expnd, se_layer=se_layer))
|
| 175 |
+
prev_input_channels = output_channel
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
self.last_channel = _make_divisible(last_channel * max(width_mult, 1.0), round_nearest)
|
| 179 |
+
|
| 180 |
+
# We could put this in the classifier, but I want that to be lightweight so that we could do transfer learning only on the head and
|
| 181 |
+
# the feature extraction part of the model.
|
| 182 |
+
layers.append(
|
| 183 |
+
ConvBNReLu(prev_input_channels, self.last_channel, kernel_size=1)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
self.feature_extraction = nn.Sequential(*layers)
|
| 187 |
+
self.avg_pooling = nn.AdaptiveAvgPool2d(1)
|
| 188 |
+
self.classifier = nn.Sequential(
|
| 189 |
+
nn.Dropout(0.125),
|
| 190 |
+
nn.Linear(last_channel, class_num)
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
# This bit is also just straight up copied from torch's implementation - I'm not touching it in case it gets messed up
|
| 194 |
+
# weight initialization
|
| 195 |
+
for m in self.modules():
|
| 196 |
+
if isinstance(m, nn.Conv2d):
|
| 197 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out')
|
| 198 |
+
if m.bias is not None:
|
| 199 |
+
nn.init.zeros_(m.bias)
|
| 200 |
+
elif isinstance(m, nn.BatchNorm2d):
|
| 201 |
+
nn.init.ones_(m.weight)
|
| 202 |
+
nn.init.zeros_(m.bias)
|
| 203 |
+
elif isinstance(m, nn.Linear):
|
| 204 |
+
nn.init.normal_(m.weight, 0, 0.01)
|
| 205 |
+
nn.init.zeros_(m.bias)
|
| 206 |
+
|
| 207 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 208 |
+
x = self.feature_extraction(x)
|
| 209 |
+
x = self.avg_pooling(x)
|
| 210 |
+
x = torch.flatten(x, 1)
|
| 211 |
+
print("eyo")
|
| 212 |
+
x = self.classifier(x)
|
| 213 |
+
|
| 214 |
+
return x
|
| 215 |
+
|
| 216 |
+
class QuantizableMobileNetV2_5(MobileNetV2_5):
|
| 217 |
+
def __init__(self, class_num=2, width_mult=1.0, round_nearest=8):
|
| 218 |
+
super().__init__(class_num=class_num, width_mult=width_mult, round_nearest=round_nearest)
|
| 219 |
+
self.quant = QuantStub()
|
| 220 |
+
self.dequant = DeQuantStub()
|
| 221 |
+
|
| 222 |
+
def _forward_impl(self, x: Tensor) -> Tensor:
|
| 223 |
+
x = self.feature_extraction(x)
|
| 224 |
+
|
| 225 |
+
# This was for debugging errors in shape of feature maps as they pass through - not deleting incase useful later
|
| 226 |
+
# for idx, layer in enumerate(self.feature_extraction):
|
| 227 |
+
# x = layer(x)
|
| 228 |
+
# print(f"Feature extraction layer {idx}, output shape: {x.shape}")
|
| 229 |
+
|
| 230 |
+
x = self.avg_pooling(x)
|
| 231 |
+
x = torch.flatten(x, 1)
|
| 232 |
+
x = self.classifier(x)
|
| 233 |
+
|
| 234 |
+
return x
|
| 235 |
+
|
| 236 |
+
def forward(self, x: Tensor) -> Tensor:
|
| 237 |
+
x = self.quant(x)
|
| 238 |
+
x = self._forward_impl(x)
|
| 239 |
+
x= self.dequant(x)
|
| 240 |
+
return x
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def train_single_epoch(model, loss_fnc, optimiser, data_loader, device):
|
| 244 |
+
model.train()
|
| 245 |
+
running_loss = 0
|
| 246 |
+
running_time = 0.0
|
| 247 |
+
for images, labels in data_loader:
|
| 248 |
+
start_time = time.time()
|
| 249 |
+
print(".", end=" ")
|
| 250 |
+
|
| 251 |
+
images, labels = images.to(device), labels.to(device)
|
| 252 |
+
preds = model(images)
|
| 253 |
+
loss = loss_fnc(preds, labels)
|
| 254 |
+
loss.backward()
|
| 255 |
+
optimiser.step()
|
| 256 |
+
|
| 257 |
+
running_loss += loss.item()
|
| 258 |
+
running_time += time.time() - start_time
|
| 259 |
+
|
| 260 |
+
print(f"{(time.time() - start_time):.2f}, {(running_time):.2f}", end=" ")
|
| 261 |
+
|
| 262 |
+
print(f"loss of {running_loss}")
|
| 263 |
+
return
|
| 264 |
+
|
| 265 |
+
def print_size_of_model(model):
|
| 266 |
+
torch.save(model.state_dict(), "temp.p")
|
| 267 |
+
print('Size (MB):', os.path.getsize("temp.p")/1e6)
|
| 268 |
+
os.remove('temp.p')
|
| 269 |
+
|
| 270 |
+
def adjust_quantisation_engine():
|
| 271 |
+
# Adjust according to what your device supports
|
| 272 |
+
print(torch.backends.quantized.supported_engines)
|
| 273 |
+
torch.backends.quantized.engine = 'qnnpack'
|
| 274 |
+
|
| 275 |
+
def train_model(model, dataloader, loss_function, optimiser, epoch_number=25, const_save=False, save=True):
|
| 276 |
+
for epoch in range(epoch_number):
|
| 277 |
+
print("IT IS EPOCH", epoch)
|
| 278 |
+
train_single_epoch(model, loss_function, optimiser, dataloader, torch.device('cpu'))
|
| 279 |
+
|
| 280 |
+
# Gradually freezes the unrequired observer parameters for quantisation and batch normalisation after a few epochs
|
| 281 |
+
if epoch > 3:
|
| 282 |
+
# Freeze quantizer parameters
|
| 283 |
+
model.apply(torch.ao.quantization.disable_observer)
|
| 284 |
+
if epoch > 2:
|
| 285 |
+
# Freeze batch norm mean and variance estimates
|
| 286 |
+
model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)
|
| 287 |
+
|
| 288 |
+
if const_save:
|
| 289 |
+
quantized_model = torch.ao.quantization.convert(model.eval(), inplace=False)
|
| 290 |
+
quantized_model.eval()
|
| 291 |
+
|
| 292 |
+
# Saving each intermediary model since they're so small, and this lets load up any of them for performace difference examples later
|
| 293 |
+
torch.save(quantized_model.state_dict(), "quantStateDict"+str(epoch+1)+".pth")
|
| 294 |
+
|
| 295 |
+
print(f"the above was Epoch {epoch+1} of {epoch_number} \nThe model has a size of", end=" ")
|
| 296 |
+
print_size_of_model(quantized_model)
|
| 297 |
+
|
| 298 |
+
else:
|
| 299 |
+
print(f"the above was Epoch {epoch} of {epoch_number}")
|
| 300 |
+
|
| 301 |
+
if save:
|
| 302 |
+
torch.save(quantized_model.state_dict(), "full_quantStateDict.pth")
|
| 303 |
+
|
| 304 |
+
return model
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
learning_rate = 1e-3
|
| 308 |
+
batch_size = 64
|
| 309 |
+
data_size = 2560
|
| 310 |
+
|
| 311 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 312 |
+
|
| 313 |
+
# Update to whatever you call your model
|
| 314 |
+
modelName = "quantStateDict8.pth"
|
| 315 |
+
load = False
|
| 316 |
+
|
| 317 |
+
model = QuantizableMobileNetV2_5()
|
| 318 |
+
|
| 319 |
+
# Adjust according to what your device supports
|
| 320 |
+
torch.backends.quantized.engine = 'qnnpack'
|
| 321 |
+
model.qconfig = torch.ao.quantization.default_qconfig
|
| 322 |
+
|
| 323 |
+
optimiser = torch.optim.SGD(model.parameters(), lr= learning_rate)
|
| 324 |
+
torch.ao.quantization.prepare_qat(model, inplace=True)
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
dataset = ClassUtils.CrosswalkDataset("zebra_annotations/classification_data")
|
| 328 |
+
train_loader = DataLoader(
|
| 329 |
+
Subset(dataset, random.sample(list(range(0, int(len(dataset) * 0.95))), data_size)),
|
| 330 |
+
batch_size=batch_size, shuffle=True)
|
| 331 |
+
test_loader = DataLoader(
|
| 332 |
+
Subset(dataset, random.sample(list(range(int(len(dataset) * 0.95), len(dataset))), 256)),
|
| 333 |
+
batch_size=batch_size, shuffle=False)
|
| 334 |
+
|
| 335 |
+
loss_function = nn.BCEWithLogitsLoss()
|
| 336 |
+
model_updated = train_model(model, train_loader, loss_function, optimiser, epoch_number=8, const_save=True)
|
| 337 |
+
|
| 338 |
+
quantized_model = torch.ao.quantization.convert(model_updated.eval(), inplace=True)
|
| 339 |
+
|
| 340 |
+
if load:
|
| 341 |
+
model_loaded_state_dict = torch.load(modelName)
|
| 342 |
+
quantized_model.load_state_dict(model_loaded_state_dict)
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
for images, labels in test_loader:
|
| 346 |
+
preds = torch.sigmoid(quantized_model(images))
|
| 347 |
+
for i in range(len(preds)):
|
| 348 |
+
print(preds)
|
| 349 |
+
# plt.imshow(torch.permute(images[i], (1, 2, 0)).detach().numpy())
|
| 350 |
+
# plt.title(f"Prediction: {preds[i]}, Actual: {labels[i][0] == 1}")
|
| 351 |
+
# plt.axis("off")
|
| 352 |
+
# plt.show()
|
SightLinks-Dev-main/LegacyVersion/ClassifierModel.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
# A really basic classifier model to detect points of interest (any likely crosswalks) for further investigation
|
| 6 |
+
class BasicClassificationModel(nn.Module):
|
| 7 |
+
def __init__(self, image_size):
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.image_size = image_size
|
| 10 |
+
|
| 11 |
+
# In channels are 3 - the RGB colours.
|
| 12 |
+
self.first_convolutional_layer = nn.Conv2d(3, 16, 5, padding=0)
|
| 13 |
+
self.second_convolutional_layer = nn.Conv2d(16, 32, 5, padding=0)
|
| 14 |
+
self.third_convolutional_layer = nn.Conv2d(32, 64, 5, padding=0)
|
| 15 |
+
|
| 16 |
+
self.fully_connected_layer = self.create_dynamic_output_layer()
|
| 17 |
+
# Actually predicts class probability
|
| 18 |
+
|
| 19 |
+
self.pooling_layer = nn.MaxPool2d(5)
|
| 20 |
+
self.activation_layer = nn.LeakyReLU(0.01)
|
| 21 |
+
|
| 22 |
+
# It is necessary to be able to take in images of a dynamic size, since may rescale depending on local regulations
|
| 23 |
+
# for crosswalk size. We can also assume squareness since this takes in the results of the segmentation model.
|
| 24 |
+
def create_dynamic_output_layer(self):
|
| 25 |
+
output_image_size = self.image_size
|
| 26 |
+
|
| 27 |
+
for layer in range(2):
|
| 28 |
+
output_image_size = ((output_image_size - 4) // 5)
|
| 29 |
+
# Convolution, then padding
|
| 30 |
+
|
| 31 |
+
output_image_size = output_image_size - 4
|
| 32 |
+
fully_connected_layer = nn.Linear(output_image_size * output_image_size * 64, 2)
|
| 33 |
+
return fully_connected_layer
|
| 34 |
+
|
| 35 |
+
def forward(self, x):
|
| 36 |
+
x = self.first_convolutional_layer(x)
|
| 37 |
+
x = self.pooling_layer(x)
|
| 38 |
+
|
| 39 |
+
x = self.second_convolutional_layer(x)
|
| 40 |
+
x = self.pooling_layer(x)
|
| 41 |
+
|
| 42 |
+
x = self.third_convolutional_layer(x)
|
| 43 |
+
x = self.activation_layer(x)
|
| 44 |
+
|
| 45 |
+
x = x.view(x.size(0), -1)
|
| 46 |
+
# Flattens the feature maps to pass into the fully connected layer - from 4D to 2D (batch, lin_tensor)
|
| 47 |
+
|
| 48 |
+
class_predictions = self.fully_connected_layer(x)
|
| 49 |
+
|
| 50 |
+
return class_predictions
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
classifierModel = BasicClassificationModel(image_size=416) # assumed square
|
| 54 |
+
print(classifierModel)
|
SightLinks-Dev-main/LegacyVersion/ClassifierTraining.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
import CrosswalkDataset as Dataset
|
| 4 |
+
import ClassifierModel as Model
|
| 5 |
+
import Utilities as Utils
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def train_model_v0(model_to_train, dataset, epoch_number=25, loss_func=Utils.BasicClassificationLoss,
|
| 9 |
+
batch_size=16, save=False):
|
| 10 |
+
optimiser = torch.optim.Adam(model_to_train.parameters(), lr=0.001)
|
| 11 |
+
dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=batch_size)
|
| 12 |
+
|
| 13 |
+
loss_function = loss_func()
|
| 14 |
+
|
| 15 |
+
for epoch in range(epoch_number):
|
| 16 |
+
model_to_train.train()
|
| 17 |
+
running_loss = 0.0
|
| 18 |
+
|
| 19 |
+
for images, gt_labels in dataloader:
|
| 20 |
+
optimiser.zero_grad()
|
| 21 |
+
|
| 22 |
+
predictions = model_to_train(images)
|
| 23 |
+
|
| 24 |
+
batch_loss = loss_function(predictions, gt_labels)
|
| 25 |
+
batch_loss.backward()
|
| 26 |
+
|
| 27 |
+
running_loss += batch_loss
|
| 28 |
+
optimiser.step()
|
| 29 |
+
|
| 30 |
+
print(f"Epoch [{epoch + 1} of {epoch_number}] finished, with loss {running_loss / len(dataloader)} in "
|
| 31 |
+
f"len {len(dataloader) * batch_size}")
|
| 32 |
+
|
| 33 |
+
Utils.save_model(model_to_train, optimiser)
|
| 34 |
+
return model_to_train
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# Additionally incorporated a learning rate scheduler
|
| 38 |
+
def train_model_v1(model_to_train, dataset, epoch_number=10, loss_func=Utils.BasicClassificationLoss,
|
| 39 |
+
batch_size=16, save=False):
|
| 40 |
+
optimiser = torch.optim.Adam(model_to_train.parameters(), lr=0.001)
|
| 41 |
+
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimiser, gamma=0.95)
|
| 42 |
+
dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=batch_size)
|
| 43 |
+
loss_function = loss_func()
|
| 44 |
+
|
| 45 |
+
for epoch in range(epoch_number):
|
| 46 |
+
model_to_train.train()
|
| 47 |
+
running_loss = 0.0
|
| 48 |
+
|
| 49 |
+
for images, gt_labels in dataloader:
|
| 50 |
+
optimiser.zero_grad()
|
| 51 |
+
|
| 52 |
+
predictions = model_to_train(images)
|
| 53 |
+
|
| 54 |
+
batch_loss = loss_function(predictions, gt_labels)
|
| 55 |
+
batch_loss.backward()
|
| 56 |
+
|
| 57 |
+
running_loss += batch_loss
|
| 58 |
+
optimiser.step()
|
| 59 |
+
|
| 60 |
+
scheduler.step()
|
| 61 |
+
|
| 62 |
+
print(f"Epoch [{epoch + 1} of {epoch_number}] finished, with loss {running_loss / len(dataloader)} in "
|
| 63 |
+
f"len {len(dataloader) * batch_size}")
|
| 64 |
+
|
| 65 |
+
Utils.save_model(model_to_train, optimiser)
|
| 66 |
+
return model_to_train
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
model = Model.BasicClassificationModel(image_size=416)
|
| 70 |
+
# size should be dynamically obtained later on
|
| 71 |
+
crosswalk_dataset = Dataset.CrosswalkDataset("Crosswalk.v7-crosswalk-t3.tensorflow/train/_annotations.csv",
|
| 72 |
+
"Crosswalk.v7-crosswalk-t3.tensorflow/train")
|
| 73 |
+
model = train_model_v1(model, crosswalk_dataset, save=True)
|
SightLinks-Dev-main/LegacyVersion/CrosswalkDataset.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from PIL import Image
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class CrosswalkDataset:
|
| 8 |
+
def __init__(self, annotation_path, image_path, transform=None):
|
| 9 |
+
self.annotations = pd.read_csv(annotation_path)
|
| 10 |
+
self.image_dir = image_path
|
| 11 |
+
|
| 12 |
+
self.transform = transform
|
| 13 |
+
# In case later one we want to do normalisation, pre-processing etc. --> Someone else will look at this
|
| 14 |
+
|
| 15 |
+
self.unique_labels = sorted(self.annotations['class'].unique())
|
| 16 |
+
# There might be a more efficient method to do this -- come back to
|
| 17 |
+
self.type_mapping = {type_value: 1 + idx for idx, type_value in enumerate(self.unique_labels)}
|
| 18 |
+
# Labels have been converted to numerical class labels mapped by type mapping - for tensor conversion, 0 is
|
| 19 |
+
# the background object/ none type. For the current binary classifier everything other than zebra is 0
|
| 20 |
+
|
| 21 |
+
self.image_data = []
|
| 22 |
+
self.labels = [] # One-to-One mapping with bounding boxes by the way
|
| 23 |
+
|
| 24 |
+
self.process_annotations()
|
| 25 |
+
|
| 26 |
+
def __len__(self):
|
| 27 |
+
return len(self.image_data)
|
| 28 |
+
|
| 29 |
+
def __getitem__(self, index):
|
| 30 |
+
image = self.image_data[index]
|
| 31 |
+
class_label = self.labels[index]
|
| 32 |
+
|
| 33 |
+
tensor_image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
|
| 34 |
+
# From (H, W, C) to (C, H, W) --> This is the format that pytorch uses.
|
| 35 |
+
tensor_label = torch.tensor(class_label, dtype=torch.float32)
|
| 36 |
+
|
| 37 |
+
if self.transform:
|
| 38 |
+
tensor_image = self.transform(tensor_image)
|
| 39 |
+
|
| 40 |
+
return tensor_image, tensor_label
|
| 41 |
+
|
| 42 |
+
def process_annotations(self):
|
| 43 |
+
for filename, group in self.annotations.groupby('filename'):
|
| 44 |
+
completed_image_path = f"{self.image_dir}/{filename}"
|
| 45 |
+
image = Image.open(completed_image_path)
|
| 46 |
+
image_array = np.array(image)
|
| 47 |
+
entity_annotations = [1, 0]
|
| 48 |
+
|
| 49 |
+
for _, row in group.iterrows():
|
| 50 |
+
# We can add additional classes in here - come back to later
|
| 51 |
+
if row['class'] == "ZebraStyle":
|
| 52 |
+
# numerical_class = self.type_mapping[row['class']]
|
| 53 |
+
entity_annotations = [0, 1]
|
| 54 |
+
# print([numerical_class, (row['xmin'], row['ymin'], row['xmax'], row['ymax'])])
|
| 55 |
+
|
| 56 |
+
else:
|
| 57 |
+
pass
|
| 58 |
+
# Classified as a background object
|
| 59 |
+
|
| 60 |
+
self.image_data.append(image_array)
|
| 61 |
+
self.labels.append(entity_annotations)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
crosswalk_dataset = CrosswalkDataset("Crosswalk.v7-crosswalk-t3.tensorflow/train/_annotations.csv",
|
| 65 |
+
"Crosswalk.v7-crosswalk-t3.tensorflow/train")
|
| 66 |
+
|
SightLinks-Dev-main/LegacyVersion/EvaluateModel.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import torch
|
| 3 |
+
import Utilities as Utils
|
| 4 |
+
import classifierModel
|
| 5 |
+
import CrosswalkDataset as Dataset
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
model = Utils.load_model_for_eval('trainedClassifier_weights.pth', classifierModel.BasicClassificationModel)
|
| 9 |
+
dataset = Dataset.CrosswalkDataset("Crosswalk.v7-crosswalk-t3.tensorflow/test/_annotations.csv",
|
| 10 |
+
"Crosswalk.v7-crosswalk-t3.tensorflow/test")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
with torch.no_grad():
|
| 14 |
+
loss = 0.0
|
| 15 |
+
batch_size = 3
|
| 16 |
+
dataloader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=batch_size)
|
| 17 |
+
loss_function = Utils.BasicClassificationLoss()
|
| 18 |
+
count, notCount = 0, 0
|
| 19 |
+
for images, gt_labels in dataloader:
|
| 20 |
+
predictions = model(images)
|
| 21 |
+
softmax_probabilities = Utils.softmax(predictions)
|
| 22 |
+
for i in range(len(images)):
|
| 23 |
+
plt.imshow(images[i].permute(1, 2, 0).numpy() / 255.0)
|
| 24 |
+
classif = False
|
| 25 |
+
if (gt_labels[i][1] > gt_labels[i][0] and softmax_probabilities[i][1] > softmax_probabilities[i][0]) or (gt_labels[i][1] <= gt_labels[i][0] and softmax_probabilities[i][1] <= softmax_probabilities[i][0]):
|
| 26 |
+
classif = True
|
| 27 |
+
count += 1
|
| 28 |
+
else:
|
| 29 |
+
notCount += 1
|
| 30 |
+
plt.title(str(softmax_probabilities[i]) + " " + str(gt_labels[i]) + str(classif))
|
| 31 |
+
plt.show()
|
| 32 |
+
print(softmax_probabilities[i])
|
| 33 |
+
|
| 34 |
+
batch_loss = loss_function(predictions, gt_labels)
|
| 35 |
+
loss += batch_loss
|
| 36 |
+
|
| 37 |
+
print("Loss is: ", loss / (len(dataloader) * batch_size))
|
| 38 |
+
print(count, notCount)
|
SightLinks-Dev-main/LegacyVersion/Utilities.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This just contains loss functions and other things required for the classifier training process
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BasicClassificationLoss(nn.Module):
|
| 8 |
+
def __init__(self):
|
| 9 |
+
super().__init__()
|
| 10 |
+
self.classification_loss = nn.CrossEntropyLoss()
|
| 11 |
+
|
| 12 |
+
def forward(self, pred_labels, gt_labels):
|
| 13 |
+
return self.classification_loss(pred_labels, gt_labels)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def save_model(trained_model, optimiser_used):
|
| 17 |
+
torch.save(trained_model, 'trainedClassifier.pth')
|
| 18 |
+
print(",")
|
| 19 |
+
torch.save(trained_model.state_dict(), 'trainedClassifier_weights.pth')
|
| 20 |
+
torch.save(optimiser_used, 'optimiserUsed.pth')
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def load_model_for_eval(file_path, model_type):
|
| 24 |
+
model_template = model_type(416)
|
| 25 |
+
model_template.load_state_dict(torch.load(file_path, weights_only=True))
|
| 26 |
+
model_template.eval()
|
| 27 |
+
return model_template
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def softmax(unprocessed_logits):
|
| 31 |
+
logits = np.array(unprocessed_logits)
|
| 32 |
+
exponentials = np.exp(logits)
|
| 33 |
+
softmax_arr = exponentials / sum(exponentials)
|
| 34 |
+
return softmax_arr
|
SightLinks-Dev-main/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SightLinks Development Repository
|
| 2 |
+
|
| 3 |
+
Welcome to the SightLinks Development Repository! We hope you can use these tools to build accessible systems and make the world a more welcoming place. This repository contains various tools and resources designed to facilitate the development of the SightLinks project, focusing on data generation, annotation, model training, feature extraction and much more. These are the tools used to develop the SightLinks system, as well as some addiitional Beta features that can bring the project even further.
|
| 4 |
+
|
| 5 |
+
## Project Overview
|
| 6 |
+
|
| 7 |
+
SightLinks is an initiative aimed at enhancing accessibility mapping through automated analysis of satellite imagery. By accurately detecting features relevant to wheelchair users and individuals with mobility needs, SightLinks contributes to the development of more inclusive navigation solutions. The system achieves high reliability in feature detection while maintaining strict safety standards to prevent false positives. ([students.cs.ucl.ac.uk](https://students.cs.ucl.ac.uk/2024/group15/index.html))
|
| 8 |
+
|
| 9 |
+
## Repository Structure
|
| 10 |
+
|
| 11 |
+
- **DevelopmentTools/**: This directory includes tools and scripts used for data generation, annotation, model training, and feature pre-processing. These resources are essential for processing satellite imagery and training machine learning models to detect accessibility features, but that's not the only thing they're limited to being used for!
|
| 12 |
+
|
| 13 |
+
- **LegacyVersion/**: This directory contains previous versions of the tools and scripts, preserved for reference and potential reuse.
|
| 14 |
+
|
| 15 |
+
## Getting Started
|
| 16 |
+
|
| 17 |
+
To begin using the tools in this repository:
|
| 18 |
+
|
| 19 |
+
1. **Clone the Repository**:
|
| 20 |
+
```bash
|
| 21 |
+
git clone https://github.com/UCL-SightLinks/SightLinks-Dev.git
|
| 22 |
+
```
|
| 23 |
+
2. **Navigate to the Development Tools Directory**:
|
| 24 |
+
```bash
|
| 25 |
+
cd SightLinks-Dev/DevelopmentTools
|
| 26 |
+
```
|
| 27 |
+
3. **Install Dependencies**: Ensure you have Python installed, then install the necessary packages:
|
| 28 |
+
```bash
|
| 29 |
+
pip install -r requirements.txt
|
| 30 |
+
```
|
| 31 |
+
4. **Run the Tools**: Follow the instructions provided in each file's documentation to generate data, annotate images, or train models.
|
| 32 |
+
|
| 33 |
+
## Contributing
|
| 34 |
+
|
| 35 |
+
We welcome contributions to enhance the functionality and efficiency of the tools in this project, in fact we encourage it. To contribute:
|
| 36 |
+
|
| 37 |
+
1. Fork the repository.
|
| 38 |
+
2. Create a new branch for your feature or bug fix:
|
| 39 |
+
```bash
|
| 40 |
+
git checkout -b feature-name
|
| 41 |
+
```
|
| 42 |
+
3. Commit your changes:
|
| 43 |
+
```bash
|
| 44 |
+
git commit -m "Description of feature or fix"
|
| 45 |
+
```
|
| 46 |
+
4. Push to your branch:
|
| 47 |
+
```bash
|
| 48 |
+
git push origin feature-name
|
| 49 |
+
```
|
| 50 |
+
5. Open a pull request detailing your changes. (we will most likely accept, there's a lot of improvements to be made!)
|
| 51 |
+
|
| 52 |
+
## License
|
| 53 |
+
|
| 54 |
+
This project is licensed under the MIT License, and owned by the UCL Computer Science department.
|
| 55 |
+
|
| 56 |
+
## Contact
|
| 57 |
+
|
| 58 |
+
For questions or further information, please contact the SightLinks development team at [zcabkde@ucl.ac.uk], or the university's computer science department as they have taken over this project ([UCL CS Department Staff Email List](https://www.ucl.ac.uk/computer-science/people/computer-science-professional-services-staff)).
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
*Note: For a comprehensive overview of the SightLinks project, including key features, functionalities, and team members, please visit our [project website](https://students.cs.ucl.ac.uk/2024/group15/index.html)!!* ([students.cs.ucl.ac.uk](https://students.cs.ucl.ac.uk/2024/group15/index.html))
|
| 63 |
+
|
SightLinks-Dev-main/requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
overpy
|
| 2 |
+
geopy
|
| 3 |
+
requests
|
| 4 |
+
torch
|
| 5 |
+
torchvision
|
| 6 |
+
matplotlib
|
| 7 |
+
numpy
|
| 8 |
+
pillow
|
| 9 |
+
shapely
|
| 10 |
+
pyyaml
|
| 11 |
+
scipy
|
| 12 |
+
scikit-image
|
| 13 |
+
opencv-python
|
| 14 |
+
pandas
|