Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import zipfile | |
| import os | |
| import shutil | |
| import pandas as pd | |
| import numpy as np | |
| import cv2 | |
| from sklearn.model_selection import train_test_split | |
| # set the title to be blue color | |
| st.title("Automation for Data Preparation") | |
| # change color to red | |
| st.write("`- This app will automate data preparation to make it easier to handled by different ML libraries.`") | |
| st.write(" ") | |
| st.write("`- Online datasets are usually in the common format. However, different ML libraries require different formats. This app will help you to convert the common format to the converted format.`") | |
| # draw a divider | |
| st.write("---") | |
| # create 2 columns | |
| col1, col2 = st.columns(2) | |
| # column 1 | |
| with col1: | |
| st.write("**Common Format:**") | |
| st.write("- root\n" | |
| " - class1\n" | |
| " - image1.jpg\n" | |
| " - image2.jpg\n" | |
| " - image3.jpg\n" | |
| " - ...\n" | |
| " - class2\n" | |
| " - image1.jpg\n" | |
| " - image2.jpg\n" | |
| " - ...\n" | |
| " - class3\n" | |
| " - image1.jpg\n" | |
| " - image2.jpg\n" | |
| " - ...\n") | |
| # column 2 | |
| with col2: | |
| # make the text bold | |
| st.write("**Converted Format:**") | |
| st.write("- root\n" | |
| " - train\n" | |
| " - class1\n" | |
| " - image1.jpg\n" | |
| " - image2.jpg\n" | |
| " - ...\n" | |
| " - class2\n" | |
| " - test\n" | |
| " - class1\n" | |
| " - image1.jpg\n" | |
| " - image2.jpg\n" | |
| " - ...\n" | |
| " - class2\n" | |
| " - image1.jpg\n") | |
| st.write("---") | |
| # input folder | |
| st.write("Please update a folder containing images in the default format as a zip file.") | |
| input_zip_file = st.file_uploader("", type=["zip"]) | |
| default_folder = 'input_folder' | |
| X = [] | |
| y = [] | |
| # add some space | |
| st.write(" ") | |
| st.write(" ") | |
| # unzip the input folder | |
| if st.button("Transform"): | |
| if input_zip_file is not None: | |
| with zipfile.ZipFile(input_zip_file, 'r') as zip_ref: | |
| zip_ref.extractall('input_folder') | |
| # transform the folder | |
| # check the folder structure to see if it is in the default format | |
| for folder in os.listdir(default_folder): | |
| if folder != '__MACOSX': # now at the root folder | |
| for class_folder in os.listdir(folder): | |
| classimg = os.path.join(folder, class_folder) | |
| for file in os.listdir(classimg): | |
| curr_file = os.path.join(classimg, file) | |
| if curr_file.endswith('.jpg'): | |
| img = cv2.imread(curr_file) | |
| img = cv2.resize(img, (224, 224)) | |
| X.append(img) | |
| y.append(class_folder) | |
| X = np.array(X) | |
| y = np.array(y) | |
| print(len(X)) | |
| print(len(y)) | |
| # create the new folder with new structure | |
| # - ouput | |
| # - train | |
| # - class1 | |
| # - image1.jpg | |
| # - image2.jpg | |
| # - ... | |
| # - class2 | |
| # - test | |
| # - class1 | |
| # - image1.jpg | |
| # - image2.jpg | |
| # - ... | |
| # - class2 | |
| output_folder = 'output' | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=42) | |
| # create the output folder | |
| for folder in np.unique(y): | |
| curr_path = os.path.join(output_folder, 'train', folder) | |
| os.makedirs(curr_path, exist_ok=True) | |
| curr_path = os.path.join(output_folder, 'test', folder) | |
| os.makedirs(curr_path, exist_ok=True) | |
| for i in range(len(X_train)): | |
| curr_path = os.path.join(output_folder, 'train', y_train[i], str(i) + '.jpg') | |
| cv2.imwrite(curr_path, X_train[i]) | |
| for i in range(len(X_test)): | |
| curr_path = os.path.join(output_folder, 'test', y_test[i], str(i) + '.jpg') | |
| cv2.imwrite(curr_path, X_test[i]) | |
| # create the class folders | |
| # train | |
| # - class1 | |
| # - class2 | |
| # test | |
| # - class1 | |
| # - class2 | |
| st.write("Transform the folder successfully.") | |
| # zip the folder | |
| shutil.make_archive('output_folder', 'zip', 'output') | |
| def get_binary_file_downloader_html(bin_file, file_label='File'): | |
| with open(bin_file, 'rb') as f: | |
| data = f.read() | |
| bin_str = data | |
| href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{bin_file}">{file_label}</a>' | |
| return href | |
| get_binary_file_downloader_html('output_folder.zip', 'Zip File') | |
| st.write("Download the zip file successfully.") | |