import pandas as pd import numpy as np from sklearn.preprocessing import OneHotEncoder from sklearn.cluster import KMeans from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import LabelEncoder from scipy.spatial.distance import euclidean import joblib class k_means_algo(): def data_preparing(self,person): #DATAFRAME WITH DATA new_dataframe = pd.DataFrame(columns=person) print(new_dataframe.columns) original_dataframe = pd.read_csv('./MLSystem/data/users_dataframe.csv') print(original_dataframe.columns) id = original_dataframe["id"].iloc[-1] last_id = id + 1 new_row_data = [last_id, "John", "Doe", 30, "john.doe@example.com", "Morning", "Night", "Bachelor", "Yes", "Yes", "Yes", "No", "Yes"] new_row_dataframe = pd.DataFrame([new_row_data], columns=person) self.result_data = pd.concat([original_dataframe, new_row_dataframe]).set_index("id") def data_checking(self,dataframe): for col in dataframe.columns: if dataframe[col].isnull().sum() > 0: print(f"Missing values in {col} column") else: print(f"No missing values in column {col}") def reshape_playground(self,data): print(f"Data shape {data.shape}") data[50].reshape(17,1) print(data.shape) def forward_algorithm(self,dataframe,cluster_spec): cluster_spec = dataframe[0] kmeans = KMeans(n_clusters=4, random_state=42) result = kmeans.fit_predict(dataframe) print(result[1]) print(result) def set_specific_cluster(self,dataframe,cluster_spec): cluster_spec = dataframe[cluster_spec] distances = [euclidean(cluster_spec,point) for point in dataframe] print(distances) def specific_cluster_kmeans(self): id = self.result_data["id"].iloc[-1] print(self.result_data) cluster_spec = self.result_data[id] cluster_spec = cluster_spec.reshape(1,17) print(cluster_spec.shape) kmeans = KMeans(n_clusters=1, init = cluster_spec, n_init = 1, random_state=42) data = kmeans.fit(dataframe) joblib.dump(kmeans, 'MLSystem/kmeans_model.pkl') my_kmeans = k_means_algo() person = ["id","Names","Surnames","Age","Email","Worktimes","Schedules","Studies level","Pets","Cooking","Sport","Smoking","Organized"] my_kmeans.data_preparing(person) #forward_algorithm(dataframe,50) #set_specific_cluster(dataframe,50) #my_kmeans.specific_cluster_kmeans()