Spaces:
Running
Running
| import ast | |
| import keras | |
| from keras.models import Sequential | |
| from keras.layers import Dense, Dropout, Flatten | |
| from keras.layers import Conv2D, MaxPooling2D | |
| from keras.preprocessing import image | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from sklearn.model_selection import train_test_split | |
| from tqdm import tqdm | |
| from keras.layers import BatchNormalization | |
| import json | |
| def label_map(category, n_classes=290): | |
| category = ast.literal_eval(category) | |
| labels = [0]*n_classes | |
| for category_id in category: | |
| labels[int(category_id)-1] = 1 | |
| return labels | |
| if __name__ == "__main__": | |
| ## load data | |
| image_dir = "images/" | |
| train_df = pd.read_csv("multilabel_classification/train.csv") | |
| train_df['categories'] = train_df['categories'].apply(label_map) | |
| file_name = [] | |
| for idx in range(len(train_df)): | |
| file_name.append(image_dir + train_df["id"][idx]+".png") | |
| train_df["file_name"] = file_name | |
| X_dataset = [] | |
| SIZE = 256 | |
| for i in range(len(train_df)): | |
| img = keras.utils.load_img(train_df["file_name"][i], target_size=(SIZE,SIZE,3)) | |
| img = keras.utils.img_to_array(img) | |
| img = img/255. | |
| X_dataset.append(img) | |
| X = np.array(X_dataset) | |
| y = np.array(train_df["categories"].to_list()) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20, test_size=0.3) | |
| # define model | |
| model = Sequential() | |
| model.add(Conv2D(filters=16, kernel_size=(5, 5), activation="relu", input_shape=(SIZE,SIZE,3))) | |
| model.add(BatchNormalization()) | |
| model.add(MaxPooling2D(pool_size=(2, 2))) | |
| model.add(Dropout(0.2)) | |
| model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu')) | |
| model.add(MaxPooling2D(pool_size=(2, 2))) | |
| model.add(BatchNormalization()) | |
| model.add(Dropout(0.2)) | |
| model.add(Conv2D(filters=64, kernel_size=(5, 5), activation="relu")) | |
| model.add(MaxPooling2D(pool_size=(2, 2))) | |
| model.add(BatchNormalization()) | |
| model.add(Dropout(0.2)) | |
| model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu')) | |
| model.add(MaxPooling2D(pool_size=(2, 2))) | |
| model.add(BatchNormalization()) | |
| model.add(Dropout(0.2)) | |
| model.add(Flatten()) | |
| model.add(Dense(128, activation='relu')) | |
| model.add(Dropout(0.5)) | |
| model.add(Dense(64, activation='relu')) | |
| model.add(Dropout(0.5)) | |
| model.add(Dense(290, activation='sigmoid')) | |
| # Train model | |
| EPOCH = 1 | |
| BATCH_SIZE = 64 | |
| #Binary cross entropy of each label. So no really a binary classification problem but | |
| #Calculating binary cross entropy for each label. | |
| model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) | |
| history = model.fit(X_train, y_train, epochs=EPOCH, validation_data=(X_test, y_test), batch_size=BATCH_SIZE) | |
| ## generate predictions on test set and save to submission.csv | |
| valid_json = json.load(open("object_detection/eval.json"))["images"] | |
| valid_df = pd.DataFrame(valid_json) | |
| predict_list = [] | |
| for i in range(len(valid_df)): | |
| img = keras.utils.load_img(image_dir + valid_df['file_name'][0], target_size=(SIZE,SIZE,3)) | |
| img = keras.utils.img_to_array(img) | |
| img = img/255. | |
| img = np.expand_dims(img, axis=0) | |
| classes = np.array(pd.read_csv("category_key.csv")["name"].to_list()) #Get array of all classes | |
| proba = model.predict(img) #Get probabilities for each class | |
| sorted_categories = np.argsort(proba[0])[:-11:-1] #Get class names for top 10 categories | |
| threshold = 0.5 | |
| predict = [] | |
| proba = proba[0] | |
| for i in range(len(proba)): | |
| if proba[i]>=threshold: | |
| predict.append(i+1) #Get true id of the class | |
| predict.sort() | |
| predict_list.append(predict) | |
| valid_id = [x[:-4] for x in valid_df["file_name"].to_list()] | |
| valid_osd = [1]*len(valid_id) | |
| submit_data = [[valid_id[i], predict_list[i], valid_osd[i]] for i in range(len(valid_id))] | |
| pd.DataFrame(data=submit_data, columns=["id", "categories", "osd"]).to_csv("submission.csv", index=False) | |