File size: 5,756 Bytes

4ec6f12

# https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34

import cv2, os, torch, re
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
import numpy as np
from model import MakiAlexNet
from tqdm import tqdm

# from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
TOP_ACCURACY_PERCENTILE = 10

TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg"
MODEL_PARAMS = "alexnet_cognitive.pth"
GIF_STORE = "dataset/gifs/"
TRAIN_STORE = "dataset/root/train/"

model = MakiAlexNet()
model.load_state_dict(torch.load(MODEL_PARAMS))
model.eval()

# Make model run on cuda if available.
if torch.cuda.is_available():
    model = model.cuda()
    print("Running on cuda")


print(dir(model))

for name, module in model.named_modules():
    # Print the layer name
    print(name)


def extract_file_paths(filename):
    """With aid from https://regex101.com/, regex."""
    extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)"
    result = re.search(extractor_reg, filename)
    frame_no = result.group(4)
    frame_name = result.group(1)
    video_no = result.group(2)
    return frame_no, frame_name, video_no


def create_mp4_from_frames(file_name, frames):
    """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """
    print("Sorted frames: ", sorted(frames))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    height, width, _ = cv2.imread(frames[0]).shape
    fps = 20  # Adjust the frames per second (FPS) as needed
    video_path = os.path.join(os.getcwd(), "dataset", "gifs", f"{file_name}.mp4")
    video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
    for frame_path in sorted(frames):
        # Convert BRG to RGB
        image = cv2.imread(frame_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # if image.dtype != np.uint8:
        #     image = (image * 255).astype(np.uint8)  # Convert to uint8
        video.write(image)

    # Release the VideoWriter
    video.release()



current_video_name = None
selected_frames = []  # stores matrices for the GIF generation.
for image_filename in ["left1_frame_5.jpg"]: # tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"):  # :

    frame_no, frame_name, video_no = extract_file_paths(image_filename)
    obtained_video_name = video_no+"vid"+frame_name
    if current_video_name != obtained_video_name:
        # We have a new video sequence, so save current sequences and name
        if selected_frames:
            filename = f"{current_video_name}"
            # Create gif from the frames.
            if current_video_name:
                create_mp4_from_frames(filename, selected_frames)
            # Clear frames and hand off to new handle.
            selected_frames = []
            current_video_name = obtained_video_name

    # With the number and name of the file paths, we can then determine which should be part of the specific GIF file.
    # f"frame_no,fileno,video_no.gif"

    img = cv2.imread(os.path.join(TRAIN_STORE, image_filename))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0)  # Convert image to tensor with float32, and extended batch size dimension.  (Batch, Channel, W,H)
    X = torch.einsum("BWHC->BCWH", img)
    if torch.cuda.is_available():
        X = X.cuda()

    output = model(X)
    # print(output)
    # print(model.layer_outputs)
    conv = model.layer_outputs['Conv2d']

    conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy()
    # print(conv.shape)  # torch.Size([1, 256, 12, 12])
    # conv = conv.squeeze(0)
    # print(conv.shape)  # torch.Size([256, 12, 12])
    scale = 224 / 12  # 256x5x5 after this additional.

    plt.figure(figsize=(16, 16))
    total_mat = None
    for i in range(256):
        plt.subplot(16, 16, i + 1)
        plt.imshow(img.squeeze(0))
        plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
    plt.show()
    # wait for user to press a key

    #     mat = zoom(conv[0, :, :, i], zoom=(scale, scale))
    #     threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE)
    #     # The Lower threshold is to zero, the more specific the look is shown.
    #
    #     mask = mat > threshold
    #     # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map)
    #
    #     # Rescale remaining values (adjust new_range if needed)
    #     new_range = 1  # Adjust based on your desired final range
    #     filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0)
    #
    #     # I just add all the maps together, which is really noisy.
    #     if type(total_mat) != type(None):
    #         total_mat += filter_map
    #     else:
    #         total_mat = filter_map
    #
    # # Normalize based on largest value,
    # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds.
    # total_mat = total_mat / abs(np.max(total_mat))
    # #
    # image = img.squeeze(0)  # .detach().numpy().astype(np.float32)
    #
    #
    # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename)))  # full path needed
    # plt.imshow(total_mat, cmap='jet', alpha=0.3)
    #
    # # selected_frames.append()
    # filename = frame_name+frame_no+video_no+".jpg"
    # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename)
    # plt.savefig(file_path)
    # selected_frames.append(file_path)


exit()


# plt.figure(figsize=(16, 16))
# for i in range(36):
#     plt.subplot(6, 6, i + 1)
#     plt.imshow(cv2.imread(TEST_IMAGE))
#     plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3)
#
# plt.show()