| # https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34 | |
| import cv2, os, torch, re | |
| import matplotlib.pyplot as plt | |
| from scipy.ndimage import zoom | |
| import numpy as np | |
| from model import MakiAlexNet | |
| from tqdm import tqdm | |
| # from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions | |
| TOP_ACCURACY_PERCENTILE = 10 | |
| TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg" | |
| MODEL_PARAMS = "alexnet_cognitive.pth" | |
| GIF_STORE = "dataset/gifs/" | |
| TRAIN_STORE = "dataset/root/train/" | |
| model = MakiAlexNet() | |
| model.load_state_dict(torch.load(MODEL_PARAMS)) | |
| model.eval() | |
| # Make model run on cuda if available. | |
| if torch.cuda.is_available(): | |
| model = model.cuda() | |
| print("Running on cuda") | |
| print(dir(model)) | |
| for name, module in model.named_modules(): | |
| # Print the layer name | |
| print(name) | |
| def extract_file_paths(filename): | |
| """With aid from https://regex101.com/, regex.""" | |
| extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)" | |
| result = re.search(extractor_reg, filename) | |
| frame_no = result.group(4) | |
| frame_name = result.group(1) | |
| video_no = result.group(2) | |
| return frame_no, frame_name, video_no | |
| def create_mp4_from_frames(file_name, frames): | |
| """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """ | |
| print("Sorted frames: ", sorted(frames)) | |
| fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
| height, width, _ = cv2.imread(frames[0]).shape | |
| fps = 20 # Adjust the frames per second (FPS) as needed | |
| video_path = os.path.join(os.getcwd(), "dataset", "gifs", f"{file_name}.mp4") | |
| video = cv2.VideoWriter(video_path, fourcc, fps, (width, height)) | |
| for frame_path in sorted(frames): | |
| # Convert BRG to RGB | |
| image = cv2.imread(frame_path) | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| # if image.dtype != np.uint8: | |
| # image = (image * 255).astype(np.uint8) # Convert to uint8 | |
| video.write(image) | |
| # Release the VideoWriter | |
| video.release() | |
| current_video_name = None | |
| selected_frames = [] # stores matrices for the GIF generation. | |
| for image_filename in ["left1_frame_5.jpg"]: # tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"): # : | |
| frame_no, frame_name, video_no = extract_file_paths(image_filename) | |
| obtained_video_name = video_no+"vid"+frame_name | |
| if current_video_name != obtained_video_name: | |
| # We have a new video sequence, so save current sequences and name | |
| if selected_frames: | |
| filename = f"{current_video_name}" | |
| # Create gif from the frames. | |
| if current_video_name: | |
| create_mp4_from_frames(filename, selected_frames) | |
| # Clear frames and hand off to new handle. | |
| selected_frames = [] | |
| current_video_name = obtained_video_name | |
| # With the number and name of the file paths, we can then determine which should be part of the specific GIF file. | |
| # f"frame_no,fileno,video_no.gif" | |
| img = cv2.imread(os.path.join(TRAIN_STORE, image_filename)) | |
| img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0) # Convert image to tensor with float32, and extended batch size dimension. (Batch, Channel, W,H) | |
| X = torch.einsum("BWHC->BCWH", img) | |
| if torch.cuda.is_available(): | |
| X = X.cuda() | |
| output = model(X) | |
| # print(output) | |
| # print(model.layer_outputs) | |
| conv = model.layer_outputs['Conv2d'] | |
| conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy() | |
| # print(conv.shape) # torch.Size([1, 256, 12, 12]) | |
| # conv = conv.squeeze(0) | |
| # print(conv.shape) # torch.Size([256, 12, 12]) | |
| scale = 224 / 12 # 256x5x5 after this additional. | |
| plt.figure(figsize=(16, 16)) | |
| total_mat = None | |
| for i in range(256): | |
| plt.subplot(16, 16, i + 1) | |
| plt.imshow(img.squeeze(0)) | |
| plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3) | |
| plt.show() | |
| # wait for user to press a key | |
| # mat = zoom(conv[0, :, :, i], zoom=(scale, scale)) | |
| # threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE) | |
| # # The Lower threshold is to zero, the more specific the look is shown. | |
| # | |
| # mask = mat > threshold | |
| # # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map) | |
| # | |
| # # Rescale remaining values (adjust new_range if needed) | |
| # new_range = 1 # Adjust based on your desired final range | |
| # filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0) | |
| # | |
| # # I just add all the maps together, which is really noisy. | |
| # if type(total_mat) != type(None): | |
| # total_mat += filter_map | |
| # else: | |
| # total_mat = filter_map | |
| # | |
| # # Normalize based on largest value, | |
| # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds. | |
| # total_mat = total_mat / abs(np.max(total_mat)) | |
| # # | |
| # image = img.squeeze(0) # .detach().numpy().astype(np.float32) | |
| # | |
| # | |
| # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename))) # full path needed | |
| # plt.imshow(total_mat, cmap='jet', alpha=0.3) | |
| # | |
| # # selected_frames.append() | |
| # filename = frame_name+frame_no+video_no+".jpg" | |
| # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename) | |
| # plt.savefig(file_path) | |
| # selected_frames.append(file_path) | |
| exit() | |
| # plt.figure(figsize=(16, 16)) | |
| # for i in range(36): | |
| # plt.subplot(6, 6, i + 1) | |
| # plt.imshow(cv2.imread(TEST_IMAGE)) | |
| # plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3) | |
| # | |
| # plt.show() | |