Spaces:
Build error
Build error
Commit ·
1c7de53
0
Parent(s):
- datamanager.py +91 -0
datamanager.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import math
|
| 4 |
+
import tarfile
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class DataManager:
|
| 8 |
+
def __init__(self, dataset_path):
|
| 9 |
+
self.dataset_path = dataset_path
|
| 10 |
+
|
| 11 |
+
def extract_dataset(self, compressed_dataset_file_name, dataset_directory):
|
| 12 |
+
try:
|
| 13 |
+
# extract files to dataset folder
|
| 14 |
+
tar = tarfile.open(compressed_dataset_file_name, "r:gz")
|
| 15 |
+
tar.extractall(dataset_directory)
|
| 16 |
+
tar.close()
|
| 17 |
+
print("Files extraction was successfull ...")
|
| 18 |
+
|
| 19 |
+
except:
|
| 20 |
+
print("Ecxception raised: No extraction was done ...")
|
| 21 |
+
|
| 22 |
+
def make_folder(self, folder_path):
|
| 23 |
+
try:
|
| 24 |
+
os.mkdir(folder_path)
|
| 25 |
+
print(folder_path, "was created ...")
|
| 26 |
+
except:
|
| 27 |
+
print("Ecxception raised: ", folder_path, "could not be created ...")
|
| 28 |
+
|
| 29 |
+
def move_files(self, src, dst, group):
|
| 30 |
+
for fname in group:
|
| 31 |
+
os.rename(src + '/' + fname, dst + '/' + fname)
|
| 32 |
+
|
| 33 |
+
def get_fnames_from_dict(self, dataset_dict, f_or_m):
|
| 34 |
+
training_data, testing_data = [], []
|
| 35 |
+
|
| 36 |
+
for i in range(1,5):
|
| 37 |
+
length_data = len(dataset_dict[f_or_m +"000" + str(i)])
|
| 38 |
+
length_separator = math.trunc(length_data*2/3)
|
| 39 |
+
|
| 40 |
+
training_data += dataset_dict[f_or_m + "000" + str(i)][:length_separator]
|
| 41 |
+
testing_data += dataset_dict[f_or_m + "000" + str(i)][length_separator:]
|
| 42 |
+
|
| 43 |
+
return training_data, testing_data
|
| 44 |
+
|
| 45 |
+
def manage(self):
|
| 46 |
+
|
| 47 |
+
# read config file and get path to compressed dataset
|
| 48 |
+
compressed_dataset_file_name = self.dataset_path
|
| 49 |
+
dataset_directory = compressed_dataset_file_name.split(".")[0]
|
| 50 |
+
|
| 51 |
+
# create a folder for the data
|
| 52 |
+
try:
|
| 53 |
+
os.mkdir(dataset_directory)
|
| 54 |
+
except:
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
+
# extract dataset
|
| 58 |
+
self.extract_dataset(compressed_dataset_file_name, dataset_directory)
|
| 59 |
+
|
| 60 |
+
# select females files and males files
|
| 61 |
+
file_names = [fname for fname in os.listdir(dataset_directory) if ("f0" in fname or "m0" in fname)]
|
| 62 |
+
dataset_dict = {"f0001": [], "f0002": [], "f0003": [], "f0004": [], "f0005": [],
|
| 63 |
+
"m0001": [], "m0002": [], "m0003": [], "m0004": [], "m0005": [], }
|
| 64 |
+
|
| 65 |
+
# fill in dictionary
|
| 66 |
+
for fname in file_names:
|
| 67 |
+
dataset_dict[fname.split('_')[0]].append(fname)
|
| 68 |
+
|
| 69 |
+
# divide and group file names
|
| 70 |
+
training_set, testing_set = {},{}
|
| 71 |
+
training_set["females"], testing_set["females"] = self.get_fnames_from_dict(dataset_dict, "f")
|
| 72 |
+
training_set["males" ], testing_set["males" ] = self.get_fnames_from_dict(dataset_dict, "m")
|
| 73 |
+
|
| 74 |
+
# make training and testing folders
|
| 75 |
+
self.make_folder("TrainingData")
|
| 76 |
+
self.make_folder("TestingData")
|
| 77 |
+
self.make_folder("TrainingData/females")
|
| 78 |
+
self.make_folder("TrainingData/males")
|
| 79 |
+
self.make_folder("TestingData/females")
|
| 80 |
+
self.make_folder("TestingData/males")
|
| 81 |
+
|
| 82 |
+
# move files
|
| 83 |
+
self.move_files(dataset_directory, "TrainingData/females", training_set["females"])
|
| 84 |
+
self.move_files(dataset_directory, "TrainingData/males", training_set["males"])
|
| 85 |
+
self.move_files(dataset_directory, "TestingData/females", testing_set["females"])
|
| 86 |
+
self.move_files(dataset_directory, "TestingData/males", testing_set["males"])
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
if __name__== "__main__":
|
| 90 |
+
data_manager = DataManager("SLR45.tgz")
|
| 91 |
+
data_manager.manage()
|