Phonk_or_not / First_Version.py
TakeWhatsYours's picture
Upload First_Version.py
cb1ba6c verified
import pandas as pd
df = pd.read_csv("Data_With_Phonks_and_Not_Phonks.csv")
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
imputer = IterativeImputer(initial_strategy="median", random_state=42)
import numpy as np
training_data_num = train_data.select_dtypes(include=[np.number])
imputer.fit(training_data_num)
X = imputer.transform(training_data_num)
imputer.feature_names_in_
train_data_tr = pd.DataFrame(X, columns=training_data_num.columns,
index=training_data_num.index)
from sklearn.pipeline import Pipeline
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler
num_pipeline = Pipeline([
("imputer", IterativeImputer(initial_strategy="median")),
("scaler", StandardScaler())
])
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
cat_pipeline = Pipeline([
("ordinal_encoder", OrdinalEncoder()),
("imputer", IterativeImputer(initial_strategy="most_frequent")),
("cat_encoder", OneHotEncoder(sparse_output=False)),
])
from sklearn.compose import ColumnTransformer
num_attribs = ["danceability_%", "energy_%", "bpm", "speechiness_%", "acousticness_%",
"instrumentalness_%", "liveness_%", "valence_%"]
cat_attribs = ["key", "mode"]
preprocess_pipeline = ColumnTransformer([
("num", num_pipeline, num_attribs),
("cat", cat_pipeline, cat_attribs),
])
X_train = preprocess_pipeline.fit_transform(train_data)
X_train
y_train = train_data["genre"]
from sklearn.svm import SVC
svm_clf = SVC(random_state=42)
svm_clf.fit(X_train, y_train)
X_test = preprocess_pipeline.transform(test_data)
y_pred = svm_clf.predict(X_test)
from sklearn.model_selection import cross_val_score
svm_scores = cross_val_score(svm_clf, X_train, y_train, cv=10)
svm_scores.mean()