| | from sklearn.decomposition import PCA, IncrementalPCA |
| | from sklearn.decomposition import TruncatedSVD |
| | import numpy as np |
| | import pickle |
| | import os |
| | from tqdm import tqdm |
| | from numpy import save, load |
| | import math |
| | from PIL import Image |
| | from numpy import save, load |
| |
|
| |
|
| | class PCAUtility: |
| | eigenvalues_prefix = "_eigenvalues_" |
| | eigenvectors_prefix = "_eigenvectors_" |
| | meanvector_prefix = "_meanvector_" |
| |
|
| | def create_pca_from_npy(self, dataset_name, labels_npy_path, pca_percentages): |
| | """ |
| | generate and save eigenvalues, eigenvectors, meanvector |
| | :param labels_npy_path: the path to the normalized labels that are save in npy format. |
| | :param pca_percentages: % of eigenvalues that will be used |
| | :return: generate |
| | """ |
| | path = labels_npy_path |
| | print('PCA calculation started: loading labels') |
| |
|
| | lbl_arr = [] |
| | for file in tqdm(os.listdir(path)): |
| | if file.endswith(".npy"): |
| | npy_file = os.path.join(path, file) |
| | lbl_arr.append(load(npy_file)) |
| |
|
| | lbl_arr = np.array(lbl_arr) |
| |
|
| | reduced_lbl_arr, eigenvalues, eigenvectors = self._func_PCA(lbl_arr, pca_percentages) |
| | mean_lbl_arr = np.mean(lbl_arr, axis=0) |
| | eigenvectors = eigenvectors.T |
| |
|
| | save('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages), eigenvalues) |
| | save('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages), eigenvectors) |
| | save('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages), mean_lbl_arr) |
| |
|
| | def load_pca_obj(self, dataset_name, pca_percentages): |
| | eigenvalues = np.load('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages)) |
| | eigenvectors = np.load('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages)) |
| | meanvector = np.load('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages)) |
| | return eigenvalues, eigenvectors, meanvector |
| |
|
| | def calculate_b_vector(self, predicted_vector, correction, eigenvalues, eigenvectors, meanvector): |
| | tmp1 = predicted_vector - meanvector |
| | b_vector = np.dot(eigenvectors.T, tmp1) |
| |
|
| | |
| | if correction: |
| | i = 0 |
| | for b_item in b_vector: |
| | lambda_i_sqr = 3 * math.sqrt(eigenvalues[i]) |
| |
|
| | if b_item > 0: |
| | b_item = min(b_item, lambda_i_sqr) |
| | else: |
| | b_item = max(b_item, -1 * lambda_i_sqr) |
| | b_vector[i] = b_item |
| | i += 1 |
| |
|
| | return b_vector |
| |
|
| | def _func_PCA(self, input_data, pca_postfix): |
| | input_data = np.array(input_data) |
| | pca = PCA(n_components=pca_postfix / 100) |
| | pca.fit(input_data) |
| | pca_input_data = pca.transform(input_data) |
| | eigenvalues = pca.explained_variance_ |
| | eigenvectors = pca.components_ |
| | return pca_input_data, eigenvalues, eigenvectors |
| |
|