import gradio as gr import zipfile import os import torch from src.dataloader import ImageDataset,collate_fn,AugmentedSubset,simple_augment from src.model import Classifier,Config,CNNFeatureExtractor,ClassicalFeatureExtractor,load from torch.utils.data import Subset from src.trainer import ModelTrainer,model_evaluation import torch import os import numpy as np import time import cv2 from PIL import Image import io import matplotlib.pyplot as plt import shutil import pandas as pd from sklearn.model_selection import train_test_split device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def unzip_dataset(zip_file): base_name = os.path.splitext(os.path.basename(zip_file.name))[0] dataset_path = os.path.join(".", base_name) os.makedirs(dataset_path, exist_ok=True) with zipfile.ZipFile(zip_file.name, 'r') as zip_ref: zip_ref.extractall(dataset_path) extracted_items = os.listdir(dataset_path) if len(extracted_items) == 1 and os.path.isdir(os.path.join(dataset_path, extracted_items[0])): dataset_path = os.path.join(dataset_path, extracted_items[0]) print(f"Dataset extracted to: {dataset_path}") class_names = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))] print(f"Detected classes: {class_names}") for cls in class_names: cls_path = os.path.join(dataset_path, cls) images = os.listdir(cls_path) print(f"Class '{cls}' has {len(images)} images. Sample: {images[:3]}") return dataset_path cnn_history={ "train_acc":[], "train_loss":[], "val_acc":[], "val_loss":[] } classic_history={ "train_acc":[], "train_loss":[], "val_acc":[], "val_loss":[] } training_interrupt = False def fig_to_image(fig): buf = io.BytesIO() fig.savefig(buf, format="png") buf.seek(0) img = Image.open(buf).convert("RGB") img_array = np.array(img) plt.close(fig) return img_array def plot(datas, labels, xlabel, ylabel, title, figsize=(16, 8)): fig, ax = plt.subplots(figsize=figsize) for data, label in zip(datas, labels): ax.plot(range(1, len(data) + 1), data, label=label) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) ax.legend() return fig_to_image(fig) class TrainingInterrupted(Exception): pass cnntrainer=None classictrainer=None def stop_training(): global training_interrupt training_interrupt = True if cnntrainer is not None: cnntrainer.interrupt=True if classictrainer is not None: classictrainer.interrupt=True return "Training stopped." def train(cnn,classic,train_set,val_set,batch_size,lr,epochs,device="cpu",visualize_every=5): global training_interrupt global cnntrainer,classictrainer training_interrupt = False global cnn_history global classic_history cnn_done=False cnn_history={ "train_acc":[], "train_loss":[], "val_acc":[], "val_loss":[] } classic_history={ "train_acc":[], "train_loss":[], "val_acc":[], "val_loss":[] } try: if training_interrupt: raise TrainingInterrupted("Training was interrupted!") cnntrainer = ModelTrainer(cnn,train_set,val_set,batch_size,lr,device=device,return_fig=True) classictrainer = ModelTrainer(classic,train_set,val_set,batch_size,lr,device=device,return_fig=True) cnn_text="" classic_text="" cnn_fig=None all_cnn_fig = [] all_classic_fig= [] classic_fig=None start_time = time.time() for i,(cnn_train_loss,cnn_train_acc,cnn_val_loss,cnn_val_acc,cnn_fig) in enumerate(cnntrainer.train(epochs,visualize_every=visualize_every)): if training_interrupt: raise TrainingInterrupted("Training was interrupted!") if i == epochs: break if cnn_fig is not None: for fig in cnn_fig: fig.suptitle(f"Epoch {i+1}", fontsize=16) all_cnn_fig.append(fig_to_image(fig)) cnn_text+= f"Epochs {i+1} : Train Loss: {cnn_train_loss:.4f}, Train Acc: {cnn_train_acc:.4f}, Val Loss: {cnn_val_loss:.4f}, Val Acc: {cnn_val_acc:.4f}\n" cnn_history['train_acc'].append(cnn_train_acc) cnn_history['train_loss'].append(cnn_train_loss) cnn_history['val_acc'].append(cnn_val_acc) cnn_history['val_loss'].append(cnn_val_loss) yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done cnn_done=True dt = time.time()-start_time cnn_fig=None cnn_text+=f'Time taken : {dt:.2f} seconds\n' yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done start_time = time.time() for i,(classic_train_loss,classic_train_acc,classic_val_loss,classic_val_acc,classic_fig) in enumerate(classictrainer.train(epochs,visualize_every=visualize_every)): if training_interrupt: raise TrainingInterrupted("Training was interrupted!") if i == epochs: break if classic_fig is not None: for fig in classic_fig: fig.suptitle(f"Epoch {i+1}", fontsize=16) all_classic_fig.append(fig_to_image(fig)) classic_history['train_acc'].append(classic_train_acc) classic_history['train_loss'].append(classic_train_loss) classic_history['val_acc'].append(classic_val_acc) classic_history['val_loss'].append(classic_val_loss) classic_text+= f"Epochs {i+1} : Train Loss: {classic_train_loss:.4f}, Train Acc: {classic_train_acc:.4f}, Val Loss: {classic_val_loss:.4f}, Val Acc: {classic_val_acc:.4f}\n" yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done dt = time.time()-start_time classic_fig=None classic_text+=f'Time taken : {dt:.2f} seconds\n' yield cnn_text,all_cnn_fig,classic_text,all_classic_fig,cnn_done except TrainingInterrupted as e: print(e) return def train_model(zip_file,batch_size,lr,epochs,seed,vis_every,use_augment, img_width,img_height,fc_num_layers, in_channels,conv_hidden_dim,dropout, classical_downsample, # hog_orientations,hog_pixels_per_cell,hog_cells_per_block,hog_block_norm, canny_sigma,canny_low,canny_high, gaussian_ksize,gaussian_sigmaX,gaussian_sigmaY, harris_block_size,harris_ksize,harris_k, lbp_P,lbp_R, gabor_ksize,gabor_sigma,gabor_theta,gabor_lambda,gabor_gamma, sobel_ksize): config = Config() global training_interrupt training_interrupt = False BATCH_SIZE = batch_size DATASET_PATH = unzip_dataset(zip_file) SEED = seed EPOCHS = epochs LR = lr config.img_size = (int(img_width),int(img_height)) config.fc_num_layers = int(fc_num_layers) # CNN Config config.in_channels = int(in_channels) config.conv_hidden_dim=int(conv_hidden_dim) config.dropout=dropout # Classical Config config.classical_downsample=int(classical_downsample) # config.hog_orientations=int(hog_orientations) # config.hog_pixels_per_cell=(int(hog_pixels_per_cell),int(hog_pixels_per_cell)) # config.hog_cells_per_block=(int(hog_cells_per_block),int(hog_cells_per_block)) # config.hog_block_norm=hog_block_norm config.canny_sigma=int(canny_sigma) config.canny_low=canny_low config.canny_high=canny_high config.gaussian_ksize=(int(gaussian_ksize),int(gaussian_ksize)) config.gaussian_sigmaX=gaussian_sigmaX config.gaussian_sigmaY=gaussian_sigmaY config.harris_block_size=int(harris_block_size) config.harris_ksize=int(harris_ksize) config.harris_k=harris_k config.lbp_P=int(lbp_P) config.lbp_R=int(lbp_R) config.gabor_ksize=int(gabor_ksize) config.gabor_sigma=int(gabor_sigma) config.gabor_theta=int(gabor_theta) config.gabor_lambda=int(gabor_lambda) config.gabor_gamma=gabor_gamma config.sobel_ksize=int(sobel_ksize) cnn_history_plots=[] classical_history_plots=[] cnn_plotted=False try: dataset = ImageDataset(DATASET_PATH,config.img_size) labels = [item['id'] for item in dataset.data] classes_name = dataset.classes train_idx, validation_idx = train_test_split(np.arange(len(dataset)), test_size=0.2, random_state=SEED, shuffle=True, stratify=labels) train_dataset = Subset(dataset, train_idx) val_dataset = Subset(dataset, validation_idx) if use_augment: train_dataset = AugmentedSubset(train_dataset,simple_augment) val_dataset = AugmentedSubset(val_dataset,None) else: train_dataset = AugmentedSubset(train_dataset,None) val_dataset = AugmentedSubset(val_dataset,None) cnnbackbone = CNNFeatureExtractor(config).to(device) cnnmodel = Classifier(cnnbackbone,train_dataset.dataset.classes,config).to(device) classicbackbone = ClassicalFeatureExtractor(config) classicmodel = Classifier(classicbackbone,train_dataset.dataset.classes,config).to(device) for cnn_text,cnn_fig,classic_text,classic_fig,cnn_done in train(cnnmodel,classicmodel,train_dataset,val_dataset,BATCH_SIZE,LR,EPOCHS,device,visualize_every=vis_every): if cnn_done and not cnn_plotted: cnn_plotted=True cnn_history_plots.append(plot([cnn_history['train_acc'],cnn_history['val_acc']],['Training Accuracy','Validation Accuracy'],'Epochs','Accuracy (%)','Training vs Validation Accuracy')) cnn_history_plots.append(plot([cnn_history['train_loss'],cnn_history['val_loss']],['Training Loss','Validation Loss'],'Epochs','Loss','Training vs Validation Loss')) cm,cr,roc = model_evaluation(cnnmodel,val_dataset,device,BATCH_SIZE,0,classes_name) cnn_history_plots.append(fig_to_image(cm)) cnn_history_plots.append(fig_to_image(cr)) cnn_history_plots.append(fig_to_image(roc)) yield cnn_text,cnn_fig,classic_text,classic_fig,cnn_history_plots,classical_history_plots classical_history_plots.append(plot([classic_history['train_acc'],classic_history['val_acc']],['Training Accuracy','Validation Accuracy'],'Epochs','Accuracy (%)','Training vs Validation Accuracy')) classical_history_plots.append(plot([classic_history['train_loss'],classic_history['val_loss']],['Training Loss','Validation Loss'],'Epochs','Loss','Training vs Validation Loss')) cm,cr,roc = model_evaluation(classicmodel,val_dataset,device,BATCH_SIZE,0,classes_name) classical_history_plots.append(fig_to_image(cm)) classical_history_plots.append(fig_to_image(cr)) classical_history_plots.append(fig_to_image(roc)) yield cnn_text,cnn_fig,classic_text,classic_fig,cnn_history_plots,classical_history_plots except RuntimeError as e: print(e) yield cnn_text,cnn_fig,classic_text,classic_fig,cnn_history_plots,classical_history_plots return finally: if os.path.exists(DATASET_PATH): shutil.rmtree(DATASET_PATH) print(f"Temporary dataset folder '{DATASET_PATH}' removed.") cnnmodel.save(os.path.join('trained_model','cnn_model.pt')) classicmodel.save(os.path.join('trained_model','classic_model.pt')) intro_html = """
Object Classification is a field of computer vision where we train computer to learn to classify or identify what a model is. In traditional Object Classification, the task usually consist of feature extraction and classification model. For feature extraction there has been several methods of extracting a feature using certain algorithm. These algorithm consist of algorithm such as Corner Detection, Edge Detection, Local Binary Pattern (LBP) or even Histogram of Gradient (HoG). There are a lot of means of feature extraction. After feature extraction, the feature will be passed to machine learning algorithm specifically classifier model. One such model is the SVM, k Nearest Neighbor or Naive Bayes which will learn to distinguish object categories based on said features.
With the advancement of deep learning, object classification task has been significantly simplified. Now with deep learning, we barely use feature extraction algorithm anymore. The reason is not because feature extraction has became obsolete in deep learning, instead the process itself has become part of the learning process. With deep learning, we use a model called Convolutional Neural Network (CNN). A convolutional network consist of two main layers, the convolution layer and the fully connected layer. The convolution layer apply filter on the image with a filter usually called convolutional kernel where the value of each cells in the convolutional kernel is random initially.
For more detail on how convolutional neural network work, you can refer to this link.
In reality, what this convolution operation does is extract features to be processed on for a machine learning or another deep learning model. The Convolution by itself does not result in an object classification directly. So even deep learning model such as CNN still does the traditional feature extraction then classification pipeline. However, the strength in this model is the convolution layer learns what weight it needs to use to get the best feature possible. Usually in a single convolution layer could result in tens or hundreds of feature channels.
In this program, although we will not discuss too deep about what traditional feature extraction is nor the fully inner workings of CNN, we will instead have a playground to demonstrate what feature extraction both perform and how they differ from one and another.
The model architecture used in this program will follow a CNN architecture where it will consist of Convolution layer and Fully Connected Layer as a classifier. However, we will instead make it so that the feature extraction layer or the convolution layer be replacable with a traditional feature extraction algorithm. This is done because in theory they should be able to perform just as well or a little worse as it is basically what Convolution Layer does as convolution layer is able to extract a lot more features and trainable and specific features.
For more detail you can refer to : https://github.com/VJyzCELERY/ClassicalObjectClassifier which will include a paper to explain the code and it's method.