| import pandas as pd | |
| df = pd.read_csv("Data_With_Phonks_and_Not_Phonks.csv") | |
| from sklearn.model_selection import train_test_split | |
| train_data, test_data = train_test_split(df, test_size=0.2, random_state=42) | |
| from sklearn.experimental import enable_iterative_imputer | |
| from sklearn.impute import IterativeImputer | |
| imputer = IterativeImputer(initial_strategy="median", random_state=42) | |
| import numpy as np | |
| training_data_num = train_data.select_dtypes(include=[np.number]) | |
| imputer.fit(training_data_num) | |
| X = imputer.transform(training_data_num) | |
| imputer.feature_names_in_ | |
| train_data_tr = pd.DataFrame(X, columns=training_data_num.columns, | |
| index=training_data_num.index) | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.experimental import enable_iterative_imputer | |
| from sklearn.impute import IterativeImputer | |
| from sklearn.preprocessing import StandardScaler | |
| num_pipeline = Pipeline([ | |
| ("imputer", IterativeImputer(initial_strategy="median")), | |
| ("scaler", StandardScaler()) | |
| ]) | |
| from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder | |
| cat_pipeline = Pipeline([ | |
| ("ordinal_encoder", OrdinalEncoder()), | |
| ("imputer", IterativeImputer(initial_strategy="most_frequent")), | |
| ("cat_encoder", OneHotEncoder(sparse_output=False)), | |
| ]) | |
| from sklearn.compose import ColumnTransformer | |
| num_attribs = ["danceability_%", "energy_%", "bpm", "speechiness_%", "acousticness_%", | |
| "instrumentalness_%", "liveness_%", "valence_%"] | |
| cat_attribs = ["key", "mode"] | |
| preprocess_pipeline = ColumnTransformer([ | |
| ("num", num_pipeline, num_attribs), | |
| ("cat", cat_pipeline, cat_attribs), | |
| ]) | |
| X_train = preprocess_pipeline.fit_transform(train_data) | |
| X_train | |
| y_train = train_data["genre"] | |
| from sklearn.svm import SVC | |
| svm_clf = SVC(random_state=42) | |
| svm_clf.fit(X_train, y_train) | |
| X_test = preprocess_pipeline.transform(test_data) | |
| y_pred = svm_clf.predict(X_test) | |
| from sklearn.model_selection import cross_val_score | |
| svm_scores = cross_val_score(svm_clf, X_train, y_train, cv=10) | |
| svm_scores.mean() | |