Spaces:

GabrielML
/

Explain-Animal-CNN

Sleeping

App Files Files Community

GabrielML commited on Oct 5, 2023

Commit

b426c59

1 Parent(s): 6b09016

Add new classes and features

Browse files

Files changed (25) hide show

app.py +109 -57
src/Nets.py +1 -38
src/Roboto-Regular.ttf +0 -0
src/cache/val_df.csv +0 -0
src/examples/{false_predicted/squirrel.jpg → false/bee.jpg} +2 -2
src/examples/{false_predicted/chimpanzee.jpg → false/coyote.jpg} +2 -2
src/examples/{true_predicted/cat.jpg → false/donkey.jpg} +2 -2
src/examples/false/goat.jpg +3 -0
src/examples/false/hornbill.jpg +3 -0
src/examples/false_predicted/starfish.jpg +0 -3
src/examples/true/dolphin.jpg +3 -0
src/examples/true/dragonfly.jpg +3 -0
src/examples/{false_predicted → true}/koala.jpg +2 -2
src/examples/{false_predicted → true}/sheep.jpg +2 -2
src/examples/true/squid.jpg +3 -0
src/examples/true_predicted/cockroach.jpg +0 -3
src/examples/true_predicted/flamingo.jpg +0 -3
src/examples/true_predicted/gorilla.jpg +0 -3
src/examples/true_predicted/grasshopper.jpg +0 -3
src/gradio_blocks.py +2 -2
src/header.md +2 -2
src/results/gradcam_video.mp4 +2 -2
src/results/infer_image.png +2 -2
src/results/models/best_model.pth +2 -2
src/util.py +1 -75

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import copy
 import os
 import sys
@@ -16,31 +17,43 @@ import torch
 from deep_translator import GoogleTranslator
 from gradio_blocks import build_video_to_camvideo
 from Nets import CustomResNet18
-from PIL import Image
 from pytorch_grad_cam import GradCAM, HiResCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
 from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 from pytorch_grad_cam.utils.image import show_cam_on_image
 from tqdm import tqdm
-import util
-from util import transform, CustomImageCache, imageCacheWrapper
-util.ImageCache = CustomImageCache(60, False)
 ffmpeg_path = shutil.which('ffmpeg')
 mediapy.set_ffmpeg(ffmpeg_path)
 IMAGE_PATH = os.path.join(os.getcwd(), 'src/examples')
 IMAGES_PER_ROW = 5
-MAXIMAL_FRAMES = 1000
-BATCHES_TO_PROCESS = 10
 OUTPUT_FPS = 10
-MAX_OUT_FRAMES = 60
-MODEL = CustomResNet18(90).eval()
 MODEL.load_state_dict(torch.load('src/results/models/best_model.pth', map_location=torch.device('cpu')))
 CAM_METHODS = {
     "GradCAM": GradCAM,
     "GradCAM++": GradCAMPlusPlus,
@@ -87,16 +100,21 @@ def get_class_name(idx):
 def get_class_idx(name):
     return C_NAME_TO_NUM[name]
-@lru_cache(maxsize=100)
-def get_translated(to_translate):
-    return GoogleTranslator(source="en", target="de").translate(to_translate)
-for idx in range(90): get_translated(get_class_name(idx))
-@imageCacheWrapper
-def infer_image(image):
-    if isinstance(image, dict):
-        # Its the image and a mask as pillow both -> Combine them to one image
-        image = Image.blend(image["image"], image["mask"], alpha=0.5)
     image.save('src/results/infer_image.png')
     image = transform(image)
     image = image.unsqueeze(0)
@@ -105,11 +123,13 @@ def infer_image(image):
     distribution = torch.nn.functional.softmax(output, dim=1)
     ret = defaultdict(float)
     for idx, prob in enumerate(distribution[0]):
-        animal = f'{get_class_name(idx)} ({get_translated(get_class_name(idx))})'
         ret[animal] = prob.item()
     return ret
-def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
     if image is None:
         raise gr.Error("Please upload an image.")
@@ -123,8 +143,8 @@ def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False,
         colormap = CV2_COLORMAPS[colormap]
     image_width, image_height = image.size
-    if image_width > 4000 or image_height > 4000:
-        raise gr.Error("The image is too big. The maximal size is 4000x4000.")
     MODEL.eval()
@@ -135,6 +155,8 @@ def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False,
     with CAM_METHODS[cam_method](model=MODEL, target_layers=layers) as cam:
         grayscale_cam = cam(input_tensor=image_tensor, targets=targets, aug_smooth=use_aug_smooth, eigen_smooth=use_eigen_smooth)
     grayscale_cam = grayscale_cam[0, :]
     grayscale_cam = cv2.resize(grayscale_cam, (image_width, image_height), interpolation=cv2.INTER_CUBIC)
@@ -146,10 +168,25 @@ def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False,
     else:
         image = image / 255
         visualization = show_cam_on_image(image, grayscale_cam, use_rgb=True, image_weight=alpha, colormap=colormap)
-    return Image.fromarray(visualization)
 def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
     global OUTPUT_FPS, MAXIMAL_FRAMES, BATCHES_TO_PROCESS, MAX_OUT_FRAMES
     if colormap not in CV2_COLORMAPS.keys():
         raise gr.Error(f"Colormap {colormap} not found in {list(CV2_COLORMAPS.keys())}.")
     else:
@@ -159,8 +196,8 @@ def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=Fal
     if OUTPUT_FPS == -1: OUTPUT_FPS = fps
     width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    if width > 3000 or height > 3000:
-        raise gr.Error("The video is too big. The maximal size is 3000x3000.")
     print(f'FPS: {fps}, Width: {width}, Height: {height}')
     frames = list()
@@ -213,21 +250,21 @@ def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=Fal
 def load_examples():
     folder_name_to_header = {
         "AI_Generated": "AI Generated Images",
-        "true_predicted": "True Predicted Images (Validation Set)",
-        "false_predicted": "False Predicted Images (Validation Set)",
         "others": "Other interesting images from the internet"
     }
     images_description = {
         "AI_Generated": "These images are generated by Dalle3 and Stable Diffusion. All of them are not real images and because of that it is interesting to see how the model predicts them.",
-        "true_predicted": "These images are from the validation set and the model predicted them correctly.",
-        "false_predicted": "These images are from the validation set and the model predicted them incorrectly. Maybe you can see why the model predicted them incorrectly using the GradCAM visualization. :)",
         "others": "These images are from the internet and are not part of the validation set. They are interesting because most of them show different animals."
     }
     loaded_images = defaultdict(list)
-    for image_type in ["AI_Generated", "true_predicted", "false_predicted", "others"]:
     # for image_type in os.listdir(IMAGE_PATH):
         full_path = os.path.join(IMAGE_PATH, image_type).replace('\\', '/').replace('//', '/')
         gr.Markdown(f'## {folder_name_to_header[image_type]}')
@@ -239,7 +276,7 @@ def load_examples():
                 for j in range(IMAGES_PER_ROW):
                     if i * IMAGES_PER_ROW + j >= len(images_to_load): break
                     image = images_to_load[i * IMAGES_PER_ROW + j]
-                    name = f"{image.split('.')[0]} ({get_translated(image.split('.')[0])})"
                     image = Image.open(os.path.join(full_path, image))
                     # scale so that the longest side is 600px
                     scale = 600 / max(image.size)
@@ -273,7 +310,15 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
         with gr.Column(scale=1):
             pil_logo = Image.open('animals.png')
             logo = gr.Image(value=pil_logo, scale=2, interactive=False, show_download_button=False, show_label=False, container=False, elem_id="logo")
     # -------------------------------------------
     #                INPUT IMAGE
     # -------------------------------------------
@@ -282,7 +327,6 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
             user_image = gr.Image(
                 type="pil",
                 label="Upload Your Own Image",
-                tool="sketch",
                 interactive=True,
             )
@@ -301,8 +345,9 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
                     info="Top three predicted classes and their confidences.",
                     scale=5,
                 )
-                predict_mode_button = gr.Button(value="Predict Animal", label="Predict", info="Click to make a prediction.", scale=1)
-                predict_mode_button.click(fn=infer_image, inputs=[user_image], outputs=output, queue=True)
         # -------------------------------------------
         #                EXPLAIN
@@ -348,20 +393,28 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
                         scale=2,
                         info=_info
                     )
-                    _info = """
-                        Here you can choose the animal to "explain". If you choose "Predicted Class" the GradCAM visualization will be based on the predicted class.
-                        If you choose a specific class the GradCAM visualization will be based on this class.
-                        For example if you have an image with a dog and a cat, you can select either Cat or Dog and see if the model can focus on the correct animal.
-                    """
-                    animal_to_explain = gr.Dropdown(
-                        choices=["Predicted Class"] + ALL_CLASSES,
-                        label="Animal",
-                        value="Predicted Class",
-                        interactive=True,
-                        scale=2,
-                        info=_info
-                    )
                     with gr.Row():
                         _info = """
@@ -371,7 +424,7 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
                         colormap = gr.Dropdown(
                             choices=list(CV2_COLORMAPS.keys()),
                             label="Colormap",
-                            value="Jet",
                             interactive=True,
                             scale=2,
                             info=_info
@@ -410,15 +463,16 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
                 with gr.Column():
                     output_cam = gr.Image(
                         type="pil",
                         label="GradCAM",
                         info="GradCAM visualization",
-                        scale=5,
                     )
-                    gradcam_mode_button = gr.Button(value="Show GradCAM", label="GradCAM", info="Click to make a prediction.", scale=1)
-                    gradcam_mode_button.click(fn=gradcam, inputs=[user_image, colormap, use_eigen_smooth, use_aug_smooth, bw_highlight, alpha, cam_method, layer, animal_to_explain], outputs=output_cam, queue=True)
         # -------------------------------------------
         #                Video CAM
@@ -434,11 +488,9 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped', css=css) as demo:
             loaded_images = load_examples()
             for k in loaded_images.keys():
                 for image in loaded_images[k]:
-                    image.select(fn=lambda x: x, inputs=[image], outputs=[user_image])
 if __name__ == "__main__":
     demo.queue()
-    demo.launch()

+from concurrent.futures import ThreadPoolExecutor
 import copy
 import os
 import sys
 from deep_translator import GoogleTranslator
 from gradio_blocks import build_video_to_camvideo
 from Nets import CustomResNet18
+from PIL import Image, ImageDraw, ImageFont
 from pytorch_grad_cam import GradCAM, HiResCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
 from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 from pytorch_grad_cam.utils.image import show_cam_on_image
 from tqdm import tqdm
+from util import transform
+font = ImageFont.truetype("src/Roboto-Regular.ttf", 16)
 ffmpeg_path = shutil.which('ffmpeg')
 mediapy.set_ffmpeg(ffmpeg_path)
 IMAGE_PATH = os.path.join(os.getcwd(), 'src/examples')
 IMAGES_PER_ROW = 5
+MAXIMAL_FRAMES = 700
+BATCHES_TO_PROCESS = 20
 OUTPUT_FPS = 10
+MAX_OUT_FRAMES = 70
+MODEL = CustomResNet18(111).eval()
 MODEL.load_state_dict(torch.load('src/results/models/best_model.pth', map_location=torch.device('cpu')))
+LANGUAGES_TO_SELECT = {
+    "None": None,
+    "German": "de",
+    "French": "fr",
+    "Spanish": "es",
+    "Italian": "it",
+    "Finnish": "fi",
+    "Ukrainian": "uk",
+    "Japanese": "ja",
+    "Hebrew": "iw"
+}
 CAM_METHODS = {
     "GradCAM": GradCAM,
     "GradCAM++": GradCAMPlusPlus,
 def get_class_idx(name):
     return C_NAME_TO_NUM[name]
+@lru_cache(maxsize=len(LANGUAGES_TO_SELECT.keys())*111)
+def get_translated(to_translate, target_language="German"):
+    target_language = LANGUAGES_TO_SELECT[target_language] if target_language in LANGUAGES_TO_SELECT else target_language
+    if target_language == "en": return to_translate
+    if target_language not in LANGUAGES_TO_SELECT.values(): raise gr.Error(f'Language {target_language} not found.')
+    return GoogleTranslator(source="en", target=target_language).translate(to_translate)
+# for idx in range(111): get_translated(get_class_name(idx))
+with ThreadPoolExecutor(max_workers=30) as executor:
+    # give the executor the list of images and args (in this case, the target language)
+    # and let the executor map the function to the list of images
+    for language in tqdm(LANGUAGES_TO_SELECT.keys(), desc='Preloading translations'):
+        executor.map(get_translated, ALL_CLASSES, [language] * len(ALL_CLASSES))
+def infer_image(image, target_language):
+    if image is None: raise gr.Error("Please upload an image.")
     image.save('src/results/infer_image.png')
     image = transform(image)
     image = image.unsqueeze(0)
     distribution = torch.nn.functional.softmax(output, dim=1)
     ret = defaultdict(float)
     for idx, prob in enumerate(distribution[0]):
+        animal = f'{get_class_name(idx)}'
+        if target_language is not None and target_language != "None":
+            animal += f' ({get_translated(get_class_name(idx), target_language)})'
         ret[animal] = prob.item()
     return ret
+def gradcam(image, colormap="Jet", use_eigen_smooth=False, use_aug_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class", label_image=True, target_lang="German"):
     if image is None:
         raise gr.Error("Please upload an image.")
         colormap = CV2_COLORMAPS[colormap]
     image_width, image_height = image.size
+    if image_width > 6000 or image_height > 6000:
+        raise gr.Error("The image is too big. The maximal size is 6000x6000.")
     MODEL.eval()
     with CAM_METHODS[cam_method](model=MODEL, target_layers=layers) as cam:
         grayscale_cam = cam(input_tensor=image_tensor, targets=targets, aug_smooth=use_aug_smooth, eigen_smooth=use_eigen_smooth)
+        if label_image:
+            predicted_animal = get_class_name(np.argmax(cam.outputs.cpu().data.numpy(), axis=-1)[0])
     grayscale_cam = grayscale_cam[0, :]
     grayscale_cam = cv2.resize(grayscale_cam, (image_width, image_height), interpolation=cv2.INTER_CUBIC)
     else:
         image = image / 255
         visualization = show_cam_on_image(image, grayscale_cam, use_rgb=True, image_weight=alpha, colormap=colormap)
+    if label_image:
+        # add alpha channel to visualization
+        visualization = np.concatenate([visualization, np.ones((image_height, image_width, 1), dtype=np.uint8) * 255], axis=-1)
+        plt_image = Image.fromarray(visualization, mode="RGBA")
+        draw = ImageDraw.Draw(plt_image)
+        draw.rectangle((5, 5, 150, 30), fill=(10, 10, 10, 100))
+        animal = predicted_animal.capitalize()
+        if target_lang is not None and target_lang != "None":
+            animal += f' ({get_translated(animal, target_lang)})'
+        draw.text((10, 7), animal, font=font, fill=(255, 125, 0, 255))
+        visualization = np.array(plt_image)
+    out_image = Image.fromarray(visualization)
+    return out_image
 def gradcam_video(video, colormap="Jet", use_eigen_smooth=False, BWHighlight=False, alpha=0.5, cam_method=GradCAM, layer=None, specific_class="Predicted Class"):
     global OUTPUT_FPS, MAXIMAL_FRAMES, BATCHES_TO_PROCESS, MAX_OUT_FRAMES
+    if video is None: raise gr.Error("Please upload a video.")
     if colormap not in CV2_COLORMAPS.keys():
         raise gr.Error(f"Colormap {colormap} not found in {list(CV2_COLORMAPS.keys())}.")
     else:
     if OUTPUT_FPS == -1: OUTPUT_FPS = fps
     width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    if width > 2000 or height > 2000:
+        raise gr.Error("The video is too big. The maximal size is 2000x2000.")
     print(f'FPS: {fps}, Width: {width}, Height: {height}')
     frames = list()
 def load_examples():
     folder_name_to_header = {
         "AI_Generated": "AI Generated Images",
+        "true": "True Predicted Images (Validation Set)",
+        "false": "False Predicted Images (Validation Set)",
         "others": "Other interesting images from the internet"
     }
     images_description = {
         "AI_Generated": "These images are generated by Dalle3 and Stable Diffusion. All of them are not real images and because of that it is interesting to see how the model predicts them.",
+        "true": "These images are from the validation set and the model predicted them correctly.",
+        "false": "These images are from the validation set and the model predicted them incorrectly. Maybe you can see why the model predicted them incorrectly using the GradCAM visualization. :)",
         "others": "These images are from the internet and are not part of the validation set. They are interesting because most of them show different animals."
     }
     loaded_images = defaultdict(list)
+    for image_type in ["AI_Generated", "true", "false", "others"]:
     # for image_type in os.listdir(IMAGE_PATH):
         full_path = os.path.join(IMAGE_PATH, image_type).replace('\\', '/').replace('//', '/')
         gr.Markdown(f'## {folder_name_to_header[image_type]}')
                 for j in range(IMAGES_PER_ROW):
                     if i * IMAGES_PER_ROW + j >= len(images_to_load): break
                     image = images_to_load[i * IMAGES_PER_ROW + j]
+                    name = f"{image.split('.')[0]}"
                     image = Image.open(os.path.join(full_path, image))
                     # scale so that the longest side is 600px
                     scale = 600 / max(image.size)
         with gr.Column(scale=1):
             pil_logo = Image.open('animals.png')
             logo = gr.Image(value=pil_logo, scale=2, interactive=False, show_download_button=False, show_label=False, container=False, elem_id="logo")
+            animal_translation_target_language = gr.Dropdown(
+                choices=LANGUAGES_TO_SELECT.keys(),
+                label="Translation language for animals",
+                value="German",
+                interactive=True,
+                scale=2,
+            )
     # -------------------------------------------
     #                INPUT IMAGE
     # -------------------------------------------
             user_image = gr.Image(
                 type="pil",
                 label="Upload Your Own Image",
                 interactive=True,
             )
                     info="Top three predicted classes and their confidences.",
                     scale=5,
                 )
+                with gr.Row():
+                    predict_mode_button = gr.Button(value="Predict Animal", label="Predict", info="Click to make a prediction.", scale=6)
+                    predict_mode_button.click(fn=infer_image, inputs=[user_image, animal_translation_target_language], outputs=output, queue=True)
         # -------------------------------------------
         #                EXPLAIN
                         scale=2,
                         info=_info
                     )
+                    with gr.Row():
+                        _info = """
+                            Here you can choose the animal to "explain". If you choose "Predicted Class" the GradCAM visualization will be based on the predicted class.
+                            If you choose a specific class the GradCAM visualization will be based on this class.
+                            For example if you have an image with a dog and a cat, you can select either Cat or Dog and see if the model can focus on the correct animal.
+                        """
+                        animal_to_explain = gr.Dropdown(
+                            choices=["Predicted Class"] + ALL_CLASSES,
+                            label="Animal",
+                            value="Predicted Class",
+                            interactive=True,
+                            scale=4,
+                            info=_info
+                        )
+                        show_predicted_class = gr.Checkbox(
+                            label="Show Predicted Class",
+                            value=True,
+                            interactive=True,
+                            scale=1,
+                        )
                     with gr.Row():
                         _info = """
                         colormap = gr.Dropdown(
                             choices=list(CV2_COLORMAPS.keys()),
                             label="Colormap",
+                            value="Inferno",
                             interactive=True,
                             scale=2,
                             info=_info
                 with gr.Column():
+                    gradcam_mode_button = gr.Button(value="Show GradCAM", label="GradCAM", info="Click to make a prediction.", scale=1)
                     output_cam = gr.Image(
                         type="pil",
                         label="GradCAM",
                         info="GradCAM visualization",
+                        show_label=False,
+                        scale=7,
                     )
+                    _inputs = [user_image, colormap, use_eigen_smooth, use_aug_smooth, bw_highlight, alpha, cam_method, layer, animal_to_explain, show_predicted_class, animal_translation_target_language]
+                    gradcam_mode_button.click(fn=gradcam, inputs=_inputs, outputs=output_cam, queue=True)
         # -------------------------------------------
         #                Video CAM
             loaded_images = load_examples()
             for k in loaded_images.keys():
                 for image in loaded_images[k]:
+                    image.select(fn=lambda x: x, inputs=[image], outputs=[user_image], queue=True, scroll_to_output=True)
 if __name__ == "__main__":
     demo.queue()
+    print("Starting Gradio server...")
+    demo.launch(show_tips=True)

src/Nets.py CHANGED Viewed

@@ -1,47 +1,10 @@
-import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from torchvision import models
-class SimpleCNN(nn.Module):
-    def __init__(self, k_size=3, pool_size=2, num_classes=1):
-        super(SimpleCNN, self).__init__()
-        self.relu = nn.ReLU()
-        # First Convolutional Layer
-        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=k_size, padding=1)
-        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=k_size, stride=1, padding=1)
-        self.pool1 = nn.MaxPool2d(kernel_size=pool_size)
-        # Second Convolutional Layer
-        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=k_size, stride=1, padding=1)
-        self.conv4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=k_size, stride=1, padding=1)
-        self.pool2 = nn.MaxPool2d(kernel_size=pool_size)
-        self.conv5 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=k_size, stride=1, padding=1)
-        self.conv6 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=k_size, stride=1, padding=1)
-        self.pool3 = nn.MaxPool2d(kernel_size=pool_size)
-        self.conv7 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=k_size, stride=1, padding=1)
-        self.conv8 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=k_size, stride=1, padding=1)
-        self.pool4 = nn.MaxPool2d(kernel_size=pool_size)
-        # Fully Connected Layers
-        self.fc = nn.Linear(64*14*14, num_classes)  # Adjust the input features based on your input image size
-    def forward(self, x):
-        x = self.pool1(self.relu(self.conv2(self.relu(self.conv1(x)))))
-        x = self.pool2(self.relu(self.conv4(self.relu(self.conv3(x)))))
-        x = self.pool3(self.relu(self.conv6(self.relu(self.conv5(x)))))
-        x = self.pool4(self.relu(self.conv8(self.relu(self.conv7(x)))))
-        # print(x.shape)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        return x
 class CustomResNet18(nn.Module):
     def __init__(self, num_classes=11):
         super(CustomResNet18, self).__init__()
-        self.resnet = models.resnet50(pretrained=True)
         num_features = self.resnet.fc.in_features
         self.resnet.fc = nn.Linear(num_features, num_classes)

 import torch.nn as nn
 from torchvision import models
 class CustomResNet18(nn.Module):
     def __init__(self, num_classes=11):
         super(CustomResNet18, self).__init__()
+        self.resnet = models.resnet18(pretrained=True)
         num_features = self.resnet.fc.in_features
         self.resnet.fc = nn.Linear(num_features, num_classes)

src/Roboto-Regular.ttf ADDED Viewed

Binary file (515 kB). View file

src/cache/val_df.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/examples/{false_predicted/squirrel.jpg → false/bee.jpg} RENAMED Viewed

File without changes

src/examples/{false_predicted/chimpanzee.jpg → false/coyote.jpg} RENAMED Viewed

File without changes

src/examples/{true_predicted/cat.jpg → false/donkey.jpg} RENAMED Viewed

File without changes

src/examples/false/goat.jpg ADDED Viewed

Git LFS Details

SHA256: 5407753c8df3d0a5c901215b2ebcf378d4209f332e6a33cdb30a5006bfbf8d09
Pointer size: 131 Bytes
Size of remote file: 457 kB

src/examples/false/hornbill.jpg ADDED Viewed

Git LFS Details

SHA256: 3ccfc55aa247b4eff0483adb1683d2d1d4dd0790dcaff81b3e243e4659dd1bf0
Pointer size: 130 Bytes
Size of remote file: 83.5 kB

src/examples/false_predicted/starfish.jpg DELETED Viewed

Git LFS Details

SHA256: d87e919ecb6d94c51affd428927457c26683b7ab32140418337403ebe26a0d45
Pointer size: 131 Bytes
Size of remote file: 179 kB

src/examples/true/dolphin.jpg ADDED Viewed

Git LFS Details

SHA256: 729e4bfab228c912f14733ef32107583ed4cdaa2e0f197ff259f6981f24772ac
Pointer size: 131 Bytes
Size of remote file: 118 kB

src/examples/true/dragonfly.jpg ADDED Viewed

Git LFS Details

SHA256: 8a33acb02f7e9686f4642f3a878cbf7728c875cedda9e1d3ff4db0831189d5df
Pointer size: 131 Bytes
Size of remote file: 139 kB

src/examples/{false_predicted → true}/koala.jpg RENAMED Viewed

File without changes

src/examples/{false_predicted → true}/sheep.jpg RENAMED Viewed

File without changes

src/examples/true/squid.jpg ADDED Viewed

Git LFS Details

SHA256: 1be527b0a94f05e7d5b5178b187ec95bd2d3fb992ecc38af50afbfef775d65cb
Pointer size: 130 Bytes
Size of remote file: 19.8 kB

src/examples/true_predicted/cockroach.jpg DELETED Viewed

Git LFS Details

SHA256: 04e21d254a0e49c8a868b47de902e7eb6571ea28d9413940f817e175a87f3275
Pointer size: 131 Bytes
Size of remote file: 107 kB

src/examples/true_predicted/flamingo.jpg DELETED Viewed

Git LFS Details

SHA256: e3c90ae9176e11b1dcc73f5d9c81b94433bd9d8228919340db80f404e9f6ced4
Pointer size: 131 Bytes
Size of remote file: 619 kB

src/examples/true_predicted/gorilla.jpg DELETED Viewed

Git LFS Details

SHA256: 27ce1f1437356309406de0341c4680ea9d6f72d90fd49133db3843c0af272fc8
Pointer size: 130 Bytes
Size of remote file: 12.1 kB

src/examples/true_predicted/grasshopper.jpg DELETED Viewed

Git LFS Details

SHA256: 76c372432e4bc478c35b157444cb7923d7c3827f8d3fb7cea8f5625a4b94ac51
Pointer size: 130 Bytes
Size of remote file: 10.7 kB

src/gradio_blocks.py CHANGED Viewed

@@ -29,7 +29,7 @@ def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gra
                 )
             video_layer = gr.Radio(
-                        LAYERS.keys(),
                         label="Layer",
                         value="layer4",
                         interactive=True,
@@ -48,7 +48,7 @@ def build_video_to_camvideo(CAM_METHODS, CV2_COLORMAPS, LAYERS, ALL_CLASSES, gra
                 colormap = gr.Dropdown(
                     choices=list(CV2_COLORMAPS.keys()),
                     label="Colormap",
-                    value="Jet",
                     interactive=True,
                     scale=2,
                 )

                 )
             video_layer = gr.Radio(
+                        [f"layer{i}" for i in range(1, 5)],
                         label="Layer",
                         value="layer4",
                         interactive=True,
                 colormap = gr.Dropdown(
                     choices=list(CV2_COLORMAPS.keys()),
                     label="Colormap",
+                    value="Inferno",
                     interactive=True,
                     scale=2,
                 )

src/header.md CHANGED Viewed

@@ -2,9 +2,9 @@
 This project was created by [Ilyesse](https://github.com/ilyii) and [Gabriel](https://github.com/Gabriel9753) as part of the Explainable Machine Learning module at the [University of Applied Sciences Karlsruhe](https://www.h-ka.de/).
-The dataset used in this project is the [Animal Image Dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals) from Kaggle, comprising 90 different animal species that needed to be classified. We also added approx. 1000 AI generated images for all classes to get a more diverse dataset and also improve the performance of the model.
-The employed model is ResNet50, which was trained on the dataset using transfer learning techniques.
 Translation of animal names by [deep-translator](https://pypi.org/project/deep-translator/).
 ## Usage 🦎

 This project was created by [Ilyesse](https://github.com/ilyii) and [Gabriel](https://github.com/Gabriel9753) as part of the Explainable Machine Learning module at the [University of Applied Sciences Karlsruhe](https://www.h-ka.de/).
+The dataset used in this project is the [Animal Image Dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals) from Kaggle, comprising 90 different animal species that needed to be classified. To add a little more animals to the data, we added an additional 21 unique classes, so we were now working with our own 111-animals dataset. We also added approx. 1000 AI generated images for all classes to get a more diverse dataset and also improve the performance of the model.
+The employed model is ResNet18, which was trained on the dataset using transfer learning techniques.
 Translation of animal names by [deep-translator](https://pypi.org/project/deep-translator/).
 ## Usage 🦎

src/results/gradcam_video.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9617d53ad717194350c99f6b1d2a172f01e712e4109c76b16fe3f70f32c4570
-size 772080

 version https://git-lfs.github.com/spec/v1
+oid sha256:d88ec14ff35116bf5d8bd65454616aba242d8f79bde4dcbd717aabbcc910670a
+size 917687

src/results/infer_image.png CHANGED Viewed

Git LFS Details

SHA256: 8a1d8cf8974330c3e6fe91b98860ca140fb46edfb6a1f5c8448c8d5e2ed479c7
Pointer size: 131 Bytes
Size of remote file: 339 kB

Git LFS Details

SHA256: 5fb27d68a14ee2dd5d2f99e5b24cda08ea7245ffb06731108036937eed56b9b5
Pointer size: 131 Bytes
Size of remote file: 424 kB

src/results/models/best_model.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd7be6abdcf8f64be68324d3b6d82cc4f5e02a12e6462b63b2c190d5a0a4182a
-size 95091582

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6a3f852efacebef8dee4ba74c0a73a7f33bf2180c4272dbf233a5c6157d7531
+size 45015274

src/util.py CHANGED Viewed

@@ -1,83 +1,9 @@
 import torchvision.transforms as transforms
-from torch.utils.data import DataLoader, Dataset
-from sklearn.preprocessing import LabelEncoder
-from tqdm import tqdm
-from PIL import Image
 import torch
-import imagehash
-ImageCache = None
-class AnimalDataset(Dataset):
-    def __init__(self, df, transform=None):
-        self.paths = df["path"].values
-        self.targets = df["target"].values
-        self.encoded_target = df['encoded_target'].values
-        self.transform = transform
-        self.images = []
-        for path in tqdm(self.paths):
-            self.images.append(Image.open(path).convert("RGB").resize((224, 224)))
-    def __len__(self):
-        return len(self.paths)
-    def __getitem__(self, idx):
-        img = self.images[idx]
-        if self.transform:
-            img = self.transform(img)
-        target = self.targets[idx]
-        encoded_target = torch.tensor(self.encoded_target[idx]).type(torch.LongTensor)
-        return img, encoded_target, target
-train_transform = transforms.Compose([
-    transforms.Resize((224,224)),
-    transforms.RandomHorizontalFlip(),
-    transforms.RandomRotation(10),
-    transforms.ToTensor(),
-    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-])
 # Define the transformation pipeline
 transform = transforms.Compose([
     transforms.Resize((224,224)),
     transforms.ToTensor(),  # Convert the images to PyTorch tensors
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-])
-class CustomImageCache:
-    def __init__(self, cache_size=50, debug=False):
-        self.cache = dict()
-        self.cache_size = 50
-        self.debug = debug
-        self.cache_hits = 0
-        self.cache_misses = 0
-    def __getitem__(self, image):
-        if isinstance(image, dict):
-            # Its the image and a mask as pillow both -> Combine them to one image
-            image = Image.blend(image["image"], image["mask"], alpha=0.5)
-        key = imagehash.average_hash(image)
-        if key in self.cache:
-            if self.debug: print("Cache hit!")
-            self.cache_hits += 1
-            return self.cache[key]
-        else:
-            if self.debug: print("Cache miss!")
-            self.cache_misses += 1
-            if len(self.cache.keys()) >= self.cache_size:
-                if self.debug: print("Cache full, popping item!")
-                self.cache.popitem()
-            self.cache[key] = image
-            return self.cache[key]
-    def __len__(self):
-        return len(self.cache.keys())
-    def print_info(self):
-        print(f"Cache size: {len(self)}")
-        print(f"Cache hits: {self.cache_hits}")
-        print(f"Cache misses: {self.cache_misses}")
-def imageCacheWrapper(fn):
-    def wrapper(image):
-        return fn(ImageCache[image])
-    return wrapper

 import torchvision.transforms as transforms
 import torch
 # Define the transformation pipeline
 transform = transforms.Compose([
     transforms.Resize((224,224)),
     transforms.ToTensor(),  # Convert the images to PyTorch tensors
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])