Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import skops.io as sio | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.metrics import accuracy_score, f1_score | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import OrdinalEncoder, StandardScaler | |
| # Loading data. | |
| drug_df = pd.read_csv("./data/drug200.csv") | |
| drug_df = drug_df.sample(frac=1) | |
| # train/ test split | |
| from sklearn.model_selection import train_test_split | |
| X = drug_df.drop("Drug", axis=1).values | |
| y = drug_df.Drug.values | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.3, random_state=125 | |
| ) | |
| # Pipeline | |
| cat_col = [1, 2, 3] | |
| num_col = [0, 4] | |
| transform = ColumnTransformer( | |
| [ | |
| ("encoder", OrdinalEncoder(), cat_col), | |
| ("num_imputer", SimpleImputer(strategy="median"), num_col), | |
| ("num_scaler", StandardScaler(), num_col), | |
| ] | |
| ) | |
| pipe = Pipeline( | |
| steps=[ | |
| ("preprocessing", transform), | |
| ("model", RandomForestClassifier(n_estimators=10, random_state=125)), | |
| ] | |
| ) | |
| # train | |
| pipe.fit(X_train, y_train) | |
| # Model Evaluation | |
| predictions = pipe.predict(X_test) | |
| accuracy = accuracy_score(y_test, predictions) | |
| f1 = f1_score(y_test, predictions, average="macro") | |
| print("Accuracy: ", str(round(accuracy, 2) * 100) + "%", "F1: ", round(f1, 2)) | |
| # Confusion matrix | |
| import matplotlib.pyplot as plt | |
| from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix | |
| predictions = pipe.predict(X_test) | |
| cm = confusion_matrix(y_test, predictions, labels=pipe.classes_) | |
| disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe.classes_) | |
| disp.plot() | |
| plt.savefig("./results/model_result.png", dpi=120) | |
| # Write metrics to files | |
| with open("./results/metrics.txt", "w") as outfile: | |
| outfile.write(f"\nAccuracy={round(accuracy, 2)}, F1_score = {round(f1, 2)}") | |
| # Save the model | |
| import pickle | |
| # save the model to disk | |
| filename = "./model/drug_pipeline.sav" | |
| pickle.dump(pipe, open(filename, "wb")) | |
| # sio.dump(pipe, "./model/drug_pipeline.skops") | |