Spaces:

VJyzCELERY
/

ObjectClassificationPlayground

Sleeping

App Files Files Community

VJyzCELERY commited on Dec 17, 2025

Commit

c3d45c0

1 Parent(s): 5a5e816

Commit to hf space

Browse files

Files changed (9) hide show

.gitignore +4 -0
app.py +447 -0
requirements.txt +8 -0
src/__pycache__/dataloader.cpython-312.pyc +0 -0
src/__pycache__/model.cpython-312.pyc +0 -0
src/__pycache__/trainer.cpython-312.pyc +0 -0
src/dataloader.py +40 -0
src/model.py +383 -0
src/trainer.py +203 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+/dataset
+/trained_model
+*.pt
+streamlitapp.py

app.py ADDED Viewed

	@@ -0,0 +1,447 @@

+import gradio as gr
+import zipfile
+import os
+import torch
+from src.dataloader import ImageDataset,collate_fn
+from src.model import Classifier,Config,CNNFeatureExtractor,ClassicalFeatureExtractor,load
+from torch.utils.data import Subset
+from src.trainer import ModelTrainer
+import torch
+import os
+import numpy as np
+import time
+import cv2
+from PIL import Image
+import io
+import matplotlib.pyplot as plt
+import shutil
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+from sklearn.metrics import classification_report
+from torch.utils.data import DataLoader
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def unzip_dataset(zip_file):
+    base_name = os.path.splitext(os.path.basename(zip_file.name))[0]
+    dataset_path = os.path.join(".", base_name)
+    os.makedirs(dataset_path, exist_ok=True)
+    with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
+        zip_ref.extractall(dataset_path)
+        extracted_items = os.listdir(dataset_path)
+        if len(extracted_items) == 1 and os.path.isdir(os.path.join(dataset_path, extracted_items[0])):
+            dataset_path = os.path.join(dataset_path, extracted_items[0])
+    print(f"Dataset extracted to: {dataset_path}")
+    class_names = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
+    print(f"Detected classes: {class_names}")
+    for cls in class_names:
+        cls_path = os.path.join(dataset_path, cls)
+        images = os.listdir(cls_path)
+        print(f"Class '{cls}' has {len(images)} images. Sample: {images[:3]}")
+    return dataset_path
+cnn_history={
+    "train_acc":[],
+    "train_loss":[],
+    "val_acc":[],
+    "val_loss":[]
+}
+classic_history={
+    "train_acc":[],
+    "train_loss":[],
+    "val_acc":[],
+    "val_loss":[]
+}
+training_interrupt = False
+def fig_to_image(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png")
+    buf.seek(0)
+    img = Image.open(buf).convert("RGB")
+    img_array = np.array(img)
+    plt.close(fig)
+    return img_array
+def plot(datas, labels, xlabel, ylabel, title, figsize=(16, 8)):
+    fig, ax = plt.subplots(figsize=figsize)
+    for data, label in zip(datas, labels):
+        ax.plot(range(1, len(data) + 1), data, label=label)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.legend()
+    return fig_to_image(fig)
+class TrainingInterrupted(Exception):
+    pass
+def stop_training():
+    global training_interrupt
+    training_interrupt = True
+    return "Training stopped."
+def train(cnn,classic,train_set,val_set,batch_size,lr,epochs,device="cpu",visualize_every=5):
+    global training_interrupt
+    training_interrupt = False
+    global cnn_history
+    global classic_history
+    cnn_done=False
+    cnn_history={
+        "train_acc":[],
+        "train_loss":[],
+        "val_acc":[],
+        "val_loss":[]
+    }
+    classic_history={
+        "train_acc":[],
+        "train_loss":[],
+        "val_acc":[],
+        "val_loss":[]
+    }
+    try:
+        if training_interrupt:
+            raise TrainingInterrupted("Training was interrupted!")
+        cnntrainer = ModelTrainer(cnn,train_set,val_set,batch_size,lr,device=device,return_fig=True)
+        classictrainer = ModelTrainer(classic,train_set,val_set,batch_size,lr,device=device,return_fig=True)
+        cnn_text=""
+        classic_text=""
+        cnn_fig=None
+        all_cnn_fig = []
+        all_classic_fig= []
+        classic_fig=None
+        start_time = time.time()
+        for i,(cnn_train_loss,cnn_train_acc,cnn_val_loss,cnn_val_acc,cnn_fig) in enumerate(cnntrainer.train(epochs,visualize_every=visualize_every)):
+            if training_interrupt:
+                raise TrainingInterrupted("Training was interrupted!")
+            if i == epochs:
+                break
+            if cnn_fig is not None:
+                for fig in cnn_fig:
+                    fig.suptitle(f"Epoch {i+1}", fontsize=16)
+                    all_cnn_fig.append(fig_to_image(fig))
+            cnn_text+= f"Epochs {i+1} : Train Loss: {cnn_train_loss:.4f}, Train Acc: {cnn_train_acc:.4f}, Val Loss: {cnn_val_loss:.4f}, Val Acc: {cnn_val_acc:.4f}\n"
+            cnn_history['train_acc'].append(cnn_train_acc)
+            cnn_history['train_loss'].append(cnn_train_loss)
+            cnn_history['val_acc'].append(cnn_val_acc)
+            cnn_history['val_loss'].append(cnn_val_loss)
+            yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done
+        cnn_done=True
+        dt = time.time()-start_time
+        cnn_fig=None
+        cnn_text+=f'Time taken : {dt:.2f} seconds\n'
+        yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done
+        start_time = time.time()
+        for i,(classic_train_loss,classic_train_acc,classic_val_loss,classic_val_acc,classic_fig) in enumerate(classictrainer.train(epochs,visualize_every=visualize_every)):
+            if training_interrupt:
+                raise TrainingInterrupted("Training was interrupted!")
+            if i == epochs:
+                break
+            if classic_fig is not None:
+                for fig in classic_fig:
+                    fig.suptitle(f"Epoch {i+1}", fontsize=16)
+                    all_classic_fig.append(fig_to_image(fig))
+            classic_history['train_acc'].append(classic_train_acc)
+            classic_history['train_loss'].append(classic_train_loss)
+            classic_history['val_acc'].append(classic_val_acc)
+            classic_history['val_loss'].append(classic_val_loss)
+            classic_text+= f"Epochs {i+1} : Train Loss: {classic_train_loss:.4f}, Train Acc: {classic_train_acc:.4f}, Val Loss: {classic_val_loss:.4f}, Val Acc: {classic_val_acc:.4f}\n"
+            yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done
+        dt = time.time()-start_time
+        classic_fig=None
+        classic_text+=f'Time taken : {dt:.2f} seconds\n'
+        yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done
+    except TrainingInterrupted as e:
+        print(e)
+        return
+def train_model(zip_file,batch_size,lr,epochs,seed,vis_every,
+                img_width,img_height,fc_num_layers,
+                in_channels,conv_hidden_dim,dropout,
+                classical_downsample,
+                hog_orientations,hog_pixels_per_cell,hog_cells_per_block,hog_block_norm,
+                canny_sigma,canny_low,canny_high,
+                gaussian_ksize,gaussian_sigmaX,gaussian_sigmaY,
+                harris_block_size,harris_ksize,harris_k,
+                shi_max_corners,shi_quality_level,shi_min_distance,
+                lbp_P,lbp_R,
+                gabor_ksize,gabor_sigma,gabor_theta,gabor_lambda,gabor_gamma):
+    config = Config()
+    global training_interrupt
+    training_interrupt = False
+    BATCH_SIZE = batch_size
+    DATASET_PATH = unzip_dataset(zip_file)
+    SEED = seed
+    EPOCHS = epochs
+    LR = lr
+    config.img_size = (int(img_width),int(img_height))
+    config.fc_num_layers = int(fc_num_layers)
+    # CNN Config
+    config.in_channels = int(in_channels)
+    config.conv_hidden_dim=int(conv_hidden_dim)
+    config.dropout=dropout
+    # Classical Config
+    config.classical_downsample=int(classical_downsample)
+    config.hog_orientations=int(hog_orientations)
+    config.hog_pixels_per_cell=(int(hog_pixels_per_cell),int(hog_pixels_per_cell))
+    config.hog_cells_per_block=(int(hog_cells_per_block),int(hog_cells_per_block))
+    config.hog_block_norm=hog_block_norm
+    config.canny_sigma=int(canny_sigma)
+    config.canny_low=canny_low
+    config.canny_high=canny_high
+    config.gaussian_ksize=(int(gaussian_ksize),int(gaussian_ksize))
+    config.gaussian_sigmaX=gaussian_sigmaX
+    config.gaussian_sigmaY=gaussian_sigmaY
+    config.harris_block_size=int(harris_block_size)
+    config.harris_ksize=int(harris_ksize)
+    config.harris_k=harris_k
+    config.shi_max_corners=int(shi_max_corners)
+    config.shi_quality_level=shi_quality_level
+    config.shi_min_distance=int(shi_min_distance)
+    config.lbp_P=int(lbp_P)
+    config.lbp_R=int(lbp_R)
+    config.gabor_ksize=int(gabor_ksize)
+    config.gabor_sigma=int(gabor_sigma)
+    config.gabor_theta=int(gabor_theta)
+    config.gabor_lambda=int(gabor_lambda)
+    config.gabor_gamma=gabor_gamma
+    cnn_history_plots=[]
+    classical_history_plots=[]
+    cnn_plotted=False
+    try:
+        dataset = ImageDataset(DATASET_PATH,config.img_size)
+        labels = [item['id'] for item in dataset.data]
+        train_idx, validation_idx = train_test_split(np.arange(len(dataset)),
+                                                test_size=0.2,
+                                                random_state=SEED,
+                                                shuffle=True,
+                                                stratify=labels)
+        train_dataset = Subset(dataset, train_idx)
+        val_dataset = Subset(dataset, validation_idx)
+        cnnbackbone = CNNFeatureExtractor(config).to(device)
+        cnnmodel = Classifier(cnnbackbone,train_dataset.dataset.classes,config).to(device)
+        classicbackbone = ClassicalFeatureExtractor(config)
+        classicmodel = Classifier(classicbackbone,train_dataset.dataset.classes,config).to(device)
+        for cnn_text,cnn_fig,classic_text,classic_fig,cnn_done in train(cnnmodel,classicmodel,train_dataset,val_dataset,BATCH_SIZE,LR,EPOCHS,device,visualize_every=vis_every):
+            if cnn_done and not cnn_plotted:
+                cnn_history_plots.append(plot([cnn_history['train_acc'],cnn_history['val_acc']],['Training Accuracy','Validation Accuracy'],'Epochs','Accuracy (%)','Training vs Validation Accuracy'))
+                cnn_history_plots.append(plot([cnn_history['train_loss'],cnn_history['val_loss']],['Training Loss','Validation Loss'],'Epochs','Loss','Training vs Validation Loss'))
+            yield cnn_text,cnn_fig,classic_text,classic_fig,cnn_history_plots,classical_history_plots
+        classical_history_plots.append(plot([classic_history['train_acc'],classic_history['val_acc']],['Training Accuracy','Validation Accuracy'],'Epochs','Accuracy (%)','Training vs Validation Accuracy'))
+        classical_history_plots.append(plot([classic_history['train_loss'],classic_history['val_loss']],['Training Loss','Validation Loss'],'Epochs','Loss','Training vs Validation Loss'))
+        yield cnn_text,cnn_fig,classic_text,classic_fig,cnn_history_plots,classical_history_plots
+    except RuntimeError as e:
+        print(e)
+        yield cnn_text,cnn_fig,classic_text,classic_fig,cnn_history_plots,classical_history_plots
+        return
+    finally:
+        if os.path.exists(DATASET_PATH):
+            shutil.rmtree(DATASET_PATH)
+            print(f"Temporary dataset folder '{DATASET_PATH}' removed.")
+    cnnmodel.save(os.path.join('trained_model','cnn_model.pt'))
+    classicmodel.save(os.path.join('trained_model','classic_model.pt'))
+intro_html = """
+<div style="
+    border-left:6px solid #2563eb;
+    border-right:6px solid #2563eb;
+    padding:16px;
+    border-radius:8px;
+    font-size:16px;
+    line-height:1.6;
+    text-align: justify;
+    text-justify: inter-word;
+">
+    <h1 style="margin-top:0;">Welcome to the Object Classifier Playground!</h1>
+    <p>
+    Object Classification is a field of computer vision where we train computer to learn to classify or identify what a model is.
+    In traditional Object Classification, the task usually consist of feature extraction and classification model.
+    For feature extraction there has been several methods of extracting a feature using certain algorithm. These algorithm consist of algorithm such as Corner Detection, Edge Detection, Local Binary Pattern (LBP) or even Histogram of Gradient (HoG).
+    There are a lot of means of feature extraction. After feature extraction, the feature will be passed to machine learning algorithm specifically classifier model.
+    One such model is the SVM, k Nearest Neighbor or Naive Bayes which will learn to distinguish object categories based on said features.
+    </p>
+    <p>
+    With the advancement of deep learning, object classification task has been significantly simplified. Now with deep learning, we barely use feature extraction algorithm anymore.
+    The reason is not because feature extraction has became obsolete in deep learning, instead the process itself has become part of the learning process. With deep learning, we use a model called Convolutional Neural Network (CNN).
+    A convolutional network consist of two main layers, the convolution layer and the fully connected layer. The convolution layer apply filter on the image with a filter usually called convolutional kernel where the value of each cells in the convolutional kernel is random initially.
+    </p>
+    <img src="https://raw.githubusercontent.com/VJyzCELERY/ClassicalObjectClassifier/refs/heads/main/assets/conv-illus.jpg"></img>
+    <p>
+    For more detail on how convolutional neural network work, you can refer to this <a href="https://viso.ai/deep-learning/convolution-operations/">link</a>.
+    </p>
+    <p>
+    In reality, what this convolution operation does is extract features to be processed on for a machine learning or another deep learning model. The Convolution by itself does not result in an object classification directly. So even deep learning model such as CNN
+    still does the traditional feature extraction then classification pipeline. However, the strength in this model is the convolution layer learns what weight it needs to use to get the best feature possible. Usually in a single convolution layer could result in tens or hundreds of feature channels.
+    </p>
+    <p>
+    In this program, although we will not discuss too deep about what traditional feature extraction is nor the fully inner workings of CNN, we will instead have a playground to demonstrate what feature extraction both perform and how they differ from
+    one and another.
+    </p>
+    <h2 style="margin-top:0;">The Model Architecture!</h2>
+    <p>
+    The model architecture used in this program will follow a CNN architecture where it will consist of Convolution layer and Fully Connected Layer as a classifier. However, we will instead make it so that the feature extraction layer or the convolution layer be replacable with a traditional feature extraction algorithm.
+    This is done because in theory they should be able to perform just as well or a little worse as it is basically what Convolution Layer does as convolution layer is able to extract a lot more features and trainable and specific features.
+    </p>
+    <p>
+    For more detail you can refer to : https://github.com/VJyzCELERY/ClassicalObjectClassifier which will include a paper to explain the code and it's method.
+    </p>
+</div>
+"""
+with gr.Blocks(title="Object Classifier Playground") as demo:
+    with gr.Tab("Introduction"):
+        gr.HTML(intro_html)
+    with gr.Tab("Training"):
+        with gr.Row():
+            zip_file = gr.File(label='Upload Dataset in Zip',file_types=['.zip'],file_count='single',interactive=True)
+            batch_size = gr.Number(value=32,label='Batch Size',interactive=True,precision=0)
+            lr = gr.Number(value=1e-3,label='Learning Rate',interactive=True)
+            epochs= gr.Number(value=20,label="Epochs",interactive=True,precision=0)
+            seed=gr.Number(value=42,label='Seed',interactive=True,precision=0)
+            vis_every=gr.Number(value=5,label='Visualize Validation Every (Epochs)',interactive=True,precision=0)
+        with gr.Row():
+            img_width=gr.Number(value=128,label='Image Width',interactive=True,precision=0)
+            img_height=gr.Number(value=128,label='Image Height',interactive=True,precision=0)
+            fc_num_layers = gr.Number(value=3,label="Fully Connected Layer Depth",interactive=True,precision=0)
+            dropout = gr.Slider(minimum=0,maximum=1,value=0.2,step=0.05,label='Fully Connected Layer Dropout',interactive=True)
+        gr.Markdown("# CNN Feature Extractor Configuration")
+        with gr.Accordion(label="CNN Settings",open=False):
+            with gr.Row():
+                in_channels = gr.Number(value=3,label='Input Color Channel Amount',interactive=True,precision=0)
+                conv_hidden_dim = gr.Number(value=3,label='Conv Hidden Dim',interactive=True,precision=0)
+        gr.Markdown("# Classical Feature Extractor Configuration")
+        with gr.Accordion(label='Classical Feature Extractor Settings',open=False):
+            with gr.Row():
+                classical_downsample = gr.Number(value=1,label='Classical Extractor Downsampling Amount',interactive=True,precision=0)
+            with gr.Row():
+                hog_orientations = gr.Number(value=9,label='HoG Orientations',interactive=True,precision=0)
+                hog_pixels_per_cell = gr.Number(value=16,label='HoG pixels per cell',interactive=True,precision=0)
+                hog_cells_per_block = gr.Number(value=2,label='HoG cells per block',interactive=True,precision=0)
+                hog_block_norm = gr.Dropdown(['L2-Hys'],value='L2-Hys',label='HoG Block Normalization Method',interactive=True)
+            with gr.Row():
+                canny_sigma = gr.Number(value=1.0,label='Canny Sigma Value',interactive=True)
+                canny_low = gr.Number(value=100,label='Canny Low Threshold',interactive=True,precision=0)
+                canny_high = gr.Number(value=200,label='Canny High Threshold',interactive=True,precision=0)
+            with gr.Row():
+                gaussian_ksize = gr.Number(value=3,label='Gaussian Kernel Size',interactive=True,precision=0)
+                gaussian_sigmaX = gr.Number(value=1.0,label='Gaussian Sigma X Value',interactive=True)
+                gaussian_sigmaY = gr.Number(value=1.0,label='Gaussian Sigma Y Value',interactive=True)
+            with gr.Row():
+                harris_block_size = gr.Number(value=2,label='Harris Corner Block Size',interactive=True,precision=0)
+                harris_ksize = gr.Number(value=3,label='Harris Corner Kernel Size',interactive=True,precision=0)
+                harris_k = gr.Slider(minimum=0.01, maximum=0.1, value=0.04, step=0.005, label='Harris Corner K value',interactive=True)
+            with gr.Row():
+                shi_max_corners = gr.Number(value=100,label='Shi-Tomasi Max Corners',interactive=True,precision=0)
+                shi_quality_level = gr.Number(value=0.01,label='Shi-Tomasi Quality Level',interactive=True)
+                shi_min_distance = gr.Number(value=10,label='Shi-Tomasi Min Distance',interactive=True,precision=0)
+            with gr.Row():
+                lbp_P = gr.Number(value=8,label='LBP P Value',interactive=True,precision=0)
+                lbp_R = gr.Number(value=1,label='LBP R Value',interactive=True,precision=0)
+            with gr.Row():
+                gabor_ksize  = gr.Number(value=21,label="Gabor Kernel Size",interactive=True,precision=0)
+                gabor_sigma  = gr.Number(value=5,label="Gabor Sigma",interactive=True,precision=0)
+                gabor_theta  = gr.Number(value=0,label="Gabor Theta",interactive=True,precision=0)
+                gabor_lambda = gr.Number(value=10,label="Gabor Lambda",interactive=True,precision=0)
+                gabor_gamma  = gr.Number(value=0.5,label="Gabor Gamma",interactive=True)
+        with gr.Column():
+            train_btn = gr.Button("Train Model",variant='secondary',interactive=True)
+            stop_btn = gr.Button("Stop Training")
+        with gr.Column():
+            with gr.Column():
+                gr.Markdown("### CNN Training Log")
+                cnn_log = gr.Textbox(label="CNN Log", interactive=False)
+                cnn_fig = gr.Gallery(label="CNN Batch Visualization",interactive=False,object_fit='fill',columns=1)
+                cnn_plots = gr.Gallery(label="CNN Training Performance",interactive=False,object_fit='fill',columns=1)
+            with gr.Column():
+                gr.Markdown("### Classical Training Log")
+                classical_log = gr.Textbox(label="Classical Log", interactive=False)
+                classical_fig = gr.Gallery(label="Classical Batch Visualization",interactive=False,object_fit='fill',columns=1)
+                classical_plots = gr.Gallery(label="CNN Training Performance",interactive=False,object_fit='fill',columns=1)
+        stop_btn.click(fn=stop_training, inputs=[], outputs=[])
+        train_btn.click(fn=train_model,
+                        inputs=[zip_file,batch_size,lr,epochs,seed,vis_every,
+                            img_width,img_height,fc_num_layers,
+                            in_channels,conv_hidden_dim,dropout,
+                            classical_downsample,
+                            hog_orientations,hog_pixels_per_cell,hog_cells_per_block,hog_block_norm,
+                            canny_sigma,canny_low,canny_high,
+                            gaussian_ksize,gaussian_sigmaX,gaussian_sigmaY,
+                            harris_block_size,harris_ksize,harris_k,
+                            shi_max_corners,shi_quality_level,shi_min_distance,
+                            lbp_P,lbp_R,
+                            gabor_ksize,gabor_sigma,gabor_theta,gabor_lambda,gabor_gamma],
+                        outputs=[cnn_log,cnn_fig,classical_log,classical_fig,cnn_plots,classical_plots]
+                        )
+    def make_figure_from_image(img):
+        fig, ax = plt.subplots(figsize=(8,8))
+        ax.imshow(img)
+        ax.axis("off")
+        plt.show()
+        return fig
+    def predict_image(upload,show_original,max_channels):
+        img = cv2.cvtColor(cv2.imread(upload),cv2.COLOR_BGR2RGB)
+        model_base_path = "./trained_model"
+        classic_model_path =os.path.join(model_base_path,'classic_model.pt')
+        cnn_model_path = os.path.join(model_base_path,'cnn_model.pt')
+        os.makedirs(model_base_path,exist_ok=True)
+        if os.path.exists(classic_model_path):
+            classic_model = load(classic_model_path,ClassicalFeatureExtractor,device=device)
+        else:
+            return "No Classical Model trained",None,None,None
+        if os.path.exists(cnn_model_path):
+            cnn_model = load(cnn_model_path,CNNFeatureExtractor,device=device)
+        else:
+            return "No CNN Model trained",None,None,None
+        cnn_predict = cnn_model.predict(img)
+        classic_predict = classic_model.predict(img)
+        cnn_features = cnn_model.visualize_feature(img,max_channels=max_channels)
+        classical_features = classic_model.visualize_feature(img,show_original=show_original)
+        return None,make_figure_from_image(img),cnn_predict,classic_predict,cnn_features,classical_features
+    with gr.Tab("Inference"):
+        with gr.Row():
+            image_upload = gr.File(file_count='single',file_types=['image'],label='Upload Image to Infer',interactive=True)
+        with gr.Column():
+            gr.Markdown("# CNN Settings")
+            with gr.Accordion(open=False):
+                cnn_max_channel_visual = gr.Number(value=8,precision=0,label='Max CNN Channels to Preview',interactive=True)
+        with gr.Column():
+            gr.Markdown("# Classical Settings")
+            with gr.Accordion(open=False):
+                classic_show_original = gr.Checkbox(value=True,label='Show Original Image as Features')
+        with gr.Column():
+            gr.Markdown("# Predictions")
+            verbose = gr.Markdown()
+            image_preview = gr.Plot(value=None,label="Input Image")
+            cnn_features = gr.Gallery(label='CNN Extracted Features',columns=1,object_fit='fill',interactive=False)
+            classical_features = gr.Gallery(label='Classical Extracted Features',columns=1,object_fit='fill',interactive=False)
+            cnn_prediction=gr.Textbox(interactive=False,value='No Predictions',label='CNN Predictions')
+            classical_prediction=gr.Textbox(interactive=False,value='No Predictions',label='Classical Model Predictions')
+            prediction_btn = gr.Button('Predict',variant='primary')
+        prediction_btn.click(
+            fn=predict_image,
+            inputs=[image_upload,classic_show_original,cnn_max_channel_visual],
+            outputs=[verbose,image_preview,cnn_prediction,classical_prediction,cnn_features,classical_features]
+        )
+if __name__ == "__main__":
+    demo.launch(share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==5.5.0
+pydantic==2.10.6
+torch
+torchvision
+opencv-python
+scikit-learn
+matplotlib
+scikit-image

src/__pycache__/dataloader.cpython-312.pyc ADDED Viewed

Binary file (2.57 kB). View file

src/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (22.9 kB). View file

src/__pycache__/trainer.cpython-312.pyc ADDED Viewed

Binary file (9.76 kB). View file

src/dataloader.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from torch.utils.data import Dataset
+import torch
+import os
+import numpy as np
+import cv2
+def collate_fn(batch):
+    imgs = [img for img, _ in batch]
+    labels = torch.tensor([label for _, label in batch])
+    return imgs, labels
+class ImageDataset(Dataset):
+    def __init__(self,root_path : str,img_size=(256,256)):
+        classes = os.listdir(root_path)
+        self.img_size = img_size
+        self.classes = classes
+        data = []
+        for idx,class_name in enumerate(classes):
+            class_path = os.path.join(root_path,class_name)
+            files = os.listdir(class_path)
+            for file in files:
+                filepath = os.path.join(class_path,file)
+                data.append({"image_path":filepath,"label":class_name,"id":idx})
+        self.data = data
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self,idx):
+        curr = self.data[idx]
+        label = curr['id']
+        img_path = curr['image_path']
+        img = cv2.imread(img_path)
+        img = cv2.resize(img,(self.img_size))
+        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
+        img = img.astype(np.float32) / 255.0
+        return img,label

src/model.py ADDED Viewed

	@@ -0,0 +1,383 @@

+import torch
+import torch.nn as nn
+import cv2
+import numpy as np
+from dataclasses import dataclass
+from skimage.feature import hog,local_binary_pattern
+import matplotlib.pyplot as plt
+import os
+import io
+from PIL import Image
+@dataclass
+class Config:
+    img_size=(256,256)
+    in_channels=3
+    fc_num_layers=3
+    conv_hidden_dim=3
+    conv_kernel_size=3
+    dropout=0.2
+    classical_downsample=1
+    # HOG
+    hog_orientations = 9
+    hog_pixels_per_cell = (16, 16)
+    hog_cells_per_block = (2, 2)
+    hog_block_norm = 'L2-Hys'
+    # Canny
+    canny_sigma = 1.0
+    canny_low = 100
+    canny_high = 200
+    # Gaussian
+    gaussian_ksize = (3, 3)
+    gaussian_sigmaX = 1.0
+    gaussian_sigmaY = 1.0
+    # Harris corners
+    harris_block_size = 2
+    harris_ksize = 3
+    harris_k = 0.04
+    # Shi-Tomasi corners
+    shi_max_corners = 100
+    shi_quality_level = 0.01
+    shi_min_distance = 10
+    # LBP
+    lbp_P = 8
+    lbp_R = 1
+    # Gabor filters
+    gabor_ksize = 21
+    gabor_sigma = 5
+    gabor_theta = 0
+    gabor_lambda = 10
+    gabor_gamma = 0.5
+class CNNFeatureExtractor(nn.Module):
+    def __init__(self,config : Config):
+        super().__init__()
+        layers = []
+        self.in_channels = config.in_channels
+        in_channel = config.in_channels
+        self.img_size = config.img_size
+        out_channel = 32
+        for i in range(config.conv_hidden_dim):
+            layers.append(nn.Conv2d(in_channels=in_channel,out_channels=out_channel,kernel_size=config.conv_kernel_size,stride=1,padding=1))
+            layers.append(nn.BatchNorm2d(out_channel))
+            layers.append(nn.ReLU())
+            layers.append(nn.MaxPool2d(2))
+            in_channel=out_channel
+            out_channel*=2
+        self.layers = nn.Sequential(*layers)
+    def get_device(self):
+        return next(self.parameters()).device
+    def forward(self,x):
+        if isinstance(x, list):
+            if isinstance(x[0], np.ndarray):
+                x = np.stack(x, axis=0)
+        if isinstance(x,np.ndarray):
+            if len(x.shape) == 2:
+                x = x[:, :, None]
+                x = np.expand_dims(x, 0)
+                x = x.transpose(2, 0, 1)
+            elif len(x.shape) == 3:
+                x = x.transpose(2, 0, 1)
+                x = np.expand_dims(x, 0)
+            elif x.ndim == 4:
+                x = x.transpose(0, 3, 1, 2) # Change to (B,C,H,W)
+            x = torch.from_numpy(x).float()
+        elif isinstance(x, torch.Tensor):
+            if x.ndim == 3:
+                x = x.unsqueeze(0)
+        x=x.to(self.get_device())
+        return self.layers(x) # Always expects (B,C,H,W)
+    def output(self):
+        self.eval()
+        with torch.no_grad():
+            x = torch.zeros(
+                (1, self.in_channels, self.img_size[1], self.img_size[0]),
+                device=self.get_device()
+            )
+            out = self(x)
+        return out
+    def visualize(self, input_image, max_channels=8,show=True):
+        self.eval()
+        device = self.get_device()
+        if isinstance(input_image, np.ndarray):
+            x = torch.from_numpy(input_image).permute(2, 0, 1).float().unsqueeze(0).to(device)  # HWC -> CHW -> B
+        elif isinstance(input_image, torch.Tensor):
+            x = input_image.unsqueeze(0).to(device) if input_image.ndim == 3 else input_image.to(device)
+        else:
+            raise TypeError("input_image must be np.ndarray or torch.Tensor")
+        conv_layers = [(name, module) for name, module in self.named_modules() if isinstance(module, nn.Conv2d)]
+        all_layer_images = []
+        for name, layer in conv_layers:
+            activations = []
+            def hook_fn(module, input, output):
+                activations.append(output.cpu().detach())
+            handle = layer.register_forward_hook(hook_fn)
+            _ = self(x)
+            handle.remove()
+            act = activations[0][0]
+            num_channels = min(act.shape[0], max_channels)
+            fig, axes = plt.subplots(1, num_channels, figsize=(3*num_channels, 3))
+            if num_channels == 1:
+                axes = [axes]
+            for i in range(num_channels):
+                axes[i].imshow(act[i], cmap='gray')
+                axes[i].axis('off')
+            fig.suptitle(f'Layer: {name}', fontsize=14)
+            if show:
+                plt.show()
+            buf = io.BytesIO()
+            fig.savefig(buf, format='png')
+            buf.seek(0)
+            img = Image.open(buf).convert("RGB")
+            all_layer_images.append(np.array(img))
+            plt.close(fig)
+        return all_layer_images
+class ClassicalFeatureExtractor(nn.Module):
+    def __init__(self, config : Config):
+        super().__init__()
+        self.img_size = config.img_size  # (H, W)
+        self.hog_orientations = config.hog_orientations
+        self.num_downsample = config.classical_downsample
+        self.config = config
+        self.feature_names = ['HoG','Canny Edge','Harris Corner','Shi-Tomasi corners','LBP','Gabor Filters']
+        self.device = 'cpu'
+    def get_device(self):
+        return next(self.parameters()).device if len(list(self.parameters())) > 0 else self.device
+    def extract_features(self, img):
+        cfg = self.config
+        # Convert to grayscale
+        min_h = cfg.hog_pixels_per_cell[0] * cfg.hog_cells_per_block[0]
+        min_w = cfg.hog_pixels_per_cell[1] * cfg.hog_cells_per_block[1]
+        gray = cv2.cvtColor((img*255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
+        for _ in range(self.num_downsample):
+            h, w = gray.shape
+            if h <= min_h or w <= min_w:
+                break
+            gray = cv2.pyrDown(gray)
+        gray = cv2.GaussianBlur(gray, cfg.gaussian_ksize, sigmaX=cfg.gaussian_sigmaX, sigmaY=cfg.gaussian_sigmaY)
+        feature_list = []
+        # 1. HOG
+        _, hog_image = hog(
+            gray,
+            orientations=cfg.hog_orientations,
+            pixels_per_cell=cfg.hog_pixels_per_cell,
+            cells_per_block=cfg.hog_cells_per_block,
+            block_norm=cfg.hog_block_norm,
+            visualize=True
+        )
+        feature_list.append(hog_image)
+        # 2. Canny edges
+        edges = cv2.Canny(gray, cfg.canny_low, cfg.canny_high) / 255.0
+        feature_list.append(edges)
+        # 3. Harris corners
+        harris = cv2.cornerHarris(gray, blockSize=cfg.harris_block_size, ksize=cfg.harris_ksize, k=cfg.harris_k)
+        harris = cv2.dilate(harris, None)
+        harris = np.clip(harris, 0, 1)
+        feature_list.append(harris)
+        # 4. Shi-Tomasi corners
+        shi_corners = np.zeros_like(gray, dtype=np.float32)
+        keypoints = cv2.goodFeaturesToTrack(gray, maxCorners=cfg.shi_max_corners, qualityLevel=cfg.shi_quality_level, minDistance=cfg.shi_min_distance)
+        if keypoints is not None:
+            for kp in keypoints:
+                x, y = kp.ravel()
+                shi_corners[int(y), int(x)] = 1.0
+        feature_list.append(shi_corners)
+        # 5. LBP
+        lbp = local_binary_pattern(gray, P=cfg.lbp_P, R=cfg.lbp_R, method='uniform')
+        lbp = lbp / lbp.max() if lbp.max() != 0 else lbp
+        feature_list.append(lbp)
+        # 6. Gabor filter
+        g_kernel = cv2.getGaborKernel((cfg.gabor_ksize, cfg.gabor_ksize), cfg.gabor_sigma, cfg.gabor_theta, cfg.gabor_lambda, cfg.gabor_gamma)
+        gabor_feat = cv2.filter2D(gray, cv2.CV_32F, g_kernel)
+        gabor_feat = (gabor_feat - gabor_feat.min()) / (gabor_feat.max() - gabor_feat.min() + 1e-8)
+        feature_list.append(gabor_feat)
+        # Stack all features along channel axis
+        features = np.stack(feature_list, axis=2)
+        return features.astype(np.float32)
+    def forward(self, x):
+        if isinstance(x, torch.Tensor):
+            x = x.cpu().numpy()
+        if isinstance(x, np.ndarray):
+            if x.ndim == 3:
+                x = np.expand_dims(x, 0)
+            elif x.ndim != 4:
+                raise ValueError(f"Expected input of shape HWC or BHWC, got {x.shape}")
+        elif isinstance(x, list):
+            x = np.stack(x, axis=0)
+        batch_features = []
+        for img in x:
+            if img.ndim != 3 or img.shape[2] != 3:
+                img = np.repeat(img[:, :, None], 3, axis=2)
+            feat = self.extract_features(img)
+            batch_features.append(feat)
+        batch_features = np.stack(batch_features, axis=0)
+        return torch.from_numpy(batch_features).float().to(self.get_device())
+    def visualize(self, img, show_original=True,show=True):
+        if img.ndim != 3 or img.shape[2] != 3:
+            img = np.repeat(img[:, :, None], 3, axis=2)
+        feature_stack = self.extract_features(img)
+        num_channels = feature_stack.shape[2]
+        outputs = []
+        def fig_to_pil(fig):
+            buf = io.BytesIO()
+            fig.savefig(buf, format="png", dpi=150, bbox_inches="tight")
+            buf.seek(0)
+            pil_img = Image.open(buf).copy()
+            buf.close()
+            plt.close(fig)
+            return pil_img
+        if show_original:
+            fig = plt.figure(figsize=(4, 4))
+            plt.imshow(img)
+            plt.title("Original")
+            plt.axis("off")
+            if show:
+                plt.show()
+            outputs.append(fig_to_pil(fig))
+        for c in range(num_channels):
+            fig = plt.figure(figsize=(4, 4))
+            plt.imshow(feature_stack[:, :, c], cmap="gray")
+            plt.title(f"Feature {self.feature_names[c]}")
+            plt.axis("off")
+            if show:
+                plt.show()
+            outputs.append(fig_to_pil(fig))
+        return outputs
+    def output(self):
+        """Return dummy output to compute in_features for FC head"""
+        dummy_img = np.zeros((1, self.img_size[1],self.img_size[0], 3), dtype=np.float32)
+        feat = self.forward(dummy_img)
+        return feat
+class FullyConnectedHead(nn.Module):
+    def __init__(self,in_features,classes,config:Config):
+        super().__init__()
+        num_classes = len(classes)
+        self.classes = classes
+        layers = []
+        out_features=256
+        for i in range(config.fc_num_layers):
+            layers.append(nn.Linear(in_features,out_features))
+            layers.append(nn.BatchNorm1d(out_features))
+            layers.append(nn.ReLU())
+            layers.append(nn.Dropout(config.dropout))
+            in_features=out_features
+            out_features=out_features // 2
+            if out_features <= num_classes:
+                break
+        layers.append(nn.Linear(in_features,num_classes))
+        self.layers = nn.Sequential(*layers)
+    def get_device(self):
+        return next(self.parameters()).device
+    def forward(self,x : torch.Tensor):
+        x=x.to(self.get_device())
+        return self.layers(x)
+class Classifier(nn.Module):
+    def __init__(self,backbone,classes,config : Config):
+        super().__init__()
+        self.config=config
+        self.classes=classes
+        self.backbone = backbone
+        self.flatten = nn.Flatten()
+        feat = backbone.output()
+        flat = self.flatten(feat)
+        in_features = flat.shape[1]
+        self.head = FullyConnectedHead(in_features,classes,config)
+    def get_device(self):
+        return next(self.parameters()).device
+    @torch.no_grad()
+    def predict(self, x):
+        self.eval()
+        target_size = self.config.img_size
+        x = cv2.resize(x, target_size)
+        logits = self.forward(x)
+        probs = torch.softmax(logits, dim=1)
+        pred_idx = torch.argmax(probs, dim=1).item()
+        return self.classes[pred_idx]
+    def forward(self,x):
+        feat = self.backbone(x)
+        feat = self.flatten(feat)
+        return self.head(feat)
+    def visualize_feature(self,img,return_img=True,**kwargs):
+        target_size = self.config.img_size
+        img = cv2.resize(img, target_size)
+        if return_img:
+            return self.backbone.visualize(img,**kwargs)
+        else:
+            self.backbone.visualize(img,**kwargs)
+    def save(self, path: str):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        torch.save({
+            'model_state_dict': self.state_dict(),
+            'classes': self.classes,
+            'config': self.config
+        }, path)
+        print(f"Model saved to {path}")
+@staticmethod
+def load(path: str, backbone_class, device='cpu'):
+    checkpoint = torch.load(path, map_location=device,weights_only=False)
+    config = checkpoint['config']
+    classes = checkpoint['classes']
+    backbone = backbone_class(config).to(device)
+    model = Classifier(backbone, classes, config).to(device)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.eval()
+    print(f"Model loaded from {path}")
+    return model

src/trainer.py ADDED Viewed

	@@ -0,0 +1,203 @@

+from src.model import Classifier
+from src.dataloader import ImageDataset,collate_fn
+from torch.utils.data import DataLoader
+import torch.optim as optim
+import torch.nn.functional as F
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+import torch
+import random
+import numpy as np
+import torch.nn as nn
+import time
+def seed_worker(worker_id):
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+class ModelTrainer:
+    def __init__(self,model : Classifier,train_set : ImageDataset,val_set : ImageDataset = None, batch_size=32,lr = 1e-3,device='cpu',return_fig=False, seed=None):
+        g = torch.Generator()
+        if seed is not None:
+            g.manual_seed(seed)
+        self.train_loader = DataLoader(
+            train_set,
+            batch_size,
+            shuffle=True,
+            collate_fn=collate_fn,
+            worker_init_fn=seed_worker,
+            generator=g
+        )
+        self.device = device
+        if val_set is not None:
+            self.val_loader = DataLoader(
+                val_set,
+                batch_size,
+                shuffle=False,
+                collate_fn=collate_fn,
+                worker_init_fn=seed_worker
+            )
+        else:
+            self.val_loader = None
+        self.class_names = model.classes
+        self.model = model
+        self.lr = lr
+        self.optim = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
+        self.optim.zero_grad()
+        self.criterion = nn.CrossEntropyLoss()
+        self.return_fig=return_fig
+    def visualize_batch(self, imgs, preds, labels, class_names=None, max_samples=4):
+        first_image = imgs
+        if isinstance(imgs, list):
+            imgs = np.stack(imgs, axis=0)
+            imgs = torch.from_numpy(imgs).permute(0, 3, 1, 2).float()
+        imgs_np = imgs.cpu().numpy()
+        preds = preds.cpu().numpy()
+        labels = labels.cpu().numpy()
+        batch_size = imgs_np.shape[0]
+        indices = random.sample(range(batch_size), min(max_samples, batch_size))
+        first_image = first_image[indices[0]]
+        fig_pred = plt.figure(figsize=(6 * len(indices), 5))
+        grid = fig_pred.add_gridspec(1, len(indices))
+        for col, idx in enumerate(indices):
+            ax = fig_pred.add_subplot(grid[0, col])
+            ax.imshow(imgs_np[idx].transpose(1, 2, 0))
+            if class_names:
+                title = f"P: {class_names[preds[idx]]} | T: {class_names[labels[idx]]}"
+            else:
+                title = f"P: {preds[idx]} | T: {labels[idx]}"
+            ax.set_title(title)
+            ax.axis("off")
+        fig_pred.tight_layout()
+        raw_features = self.model.visualize_feature(first_image,show=False)
+        feature_figs = []
+        for f in raw_features:
+            if isinstance(f, plt.Figure):
+                feature_figs.append(f)
+                continue
+            if hasattr(f, "mode"):
+                f = np.array(f)
+            h, w = f.shape[:2]
+            dpi = 100
+            fig_w = max(4, w / dpi)
+            fig_h = max(4, h / dpi)
+            fig = plt.figure(figsize=(fig_w, fig_h), dpi=dpi)
+            ax = fig.add_subplot(111)
+            ax.imshow(f)
+            ax.axis("off")
+            feature_figs.append(fig)
+        all_figs = [fig_pred] + feature_figs
+        if not self.return_fig:
+            plt.show()
+        plt.close(fig_pred)
+        if self.return_fig:
+            return all_figs
+        else:
+            return None
+    def train_one_epoch(self):
+        self.model.train()
+        total_loss = 0
+        train_pbar = tqdm(self.train_loader, desc="Training",leave=False)
+        correct = 0
+        total = 0
+        for imgs, labels in train_pbar:
+            labels = labels.to(self.device)
+            # Forward
+            outputs = self.model(imgs)
+            loss = self.criterion(outputs, labels)
+            # Backward
+            self.optim.zero_grad()
+            loss.backward()
+            self.optim.step()
+            preds = outputs.argmax(dim=1)
+            correct += (preds == labels).sum().item()
+            total += labels.size(0)
+            total_loss += loss.item()
+            train_pbar.set_postfix(acc=correct/total,loss=loss.item())
+        avg_loss = total_loss / len(self.train_loader)
+        avg_acc = correct / total
+        return avg_loss,avg_acc
+    def train(self, epochs=10, visualize_every=5):
+        train_losses=[]
+        train_accuracies=[]
+        val_losses=[]
+        val_accuracies=[]
+        for epoch in range(1, epochs + 1):
+            train_loss,train_acc = self.train_one_epoch()
+            train_losses.append(train_loss)
+            train_accuracies.append(train_acc)
+            if self.val_loader is not None:
+                val_loss,val_acc,fig=self.validate(epoch, visualize=(epoch % visualize_every == 0 or epoch == 1))
+                val_losses.append(val_loss)
+                val_accuracies.append(val_acc)
+                print(f"Epoch {epoch} Train Loss: {train_loss:.4f} | Train Acc : {train_acc:.4f} | Val Loss : {val_loss:.4f} | Val Acc : {val_acc:.4f}")
+                yield train_loss,train_acc,val_loss,val_acc,fig
+            else:
+                print(f"Epoch {epoch} Train Loss: {train_loss:.4f} | Train Acc : {train_acc:.4f}")
+                yield train_loss,train_acc,None,None,None
+        yield train_losses,train_accuracies,val_losses,val_accuracies,None
+    def validate(self,epoch, visualize=False):
+        if self.val_loader is None:
+            return
+        self.model.eval()
+        total_loss = 0
+        correct = 0
+        total = 0
+        val_imgs_display = None
+        val_preds_display = None
+        val_labels_display = None
+        val_pbar = tqdm(self.val_loader, desc="Validation",leave=False)
+        fig = None
+        with torch.no_grad():
+            for imgs, labels in val_pbar:
+                labels = labels.to(self.device)
+                outputs = self.model(imgs)
+                loss = self.criterion(outputs, labels)
+                total_loss += loss.item()
+                preds = outputs.argmax(dim=1)
+                correct += (preds == labels).sum().item()
+                total += labels.size(0)
+                if visualize and val_imgs_display is None:
+                    val_imgs_display = imgs
+                    val_preds_display = preds
+                    val_labels_display = labels
+                val_pbar.set_postfix(loss=loss.item(), acc=correct / total)
+        avg_loss = total_loss / len(self.val_loader)
+        acc = correct / total
+        if visualize and val_imgs_display is not None:
+            fig = self.visualize_batch(val_imgs_display, val_preds_display, val_labels_display, self.class_names)
+        return avg_loss,acc,fig