Spaces:

quangtn266
/

Drug-Classification

Sleeping

}

Update with new results

08461c2 over 1 year ago

2.07 kB

	import pandas as pd
	import skops.io as sio
	from sklearn.compose import ColumnTransformer
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.impute import SimpleImputer
	from sklearn.metrics import accuracy_score, f1_score
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OrdinalEncoder, StandardScaler

	# Loading data.
	drug_df = pd.read_csv("./data/drug200.csv")
	drug_df = drug_df.sample(frac=1)

	# train/ test split
	from sklearn.model_selection import train_test_split

	X = drug_df.drop("Drug", axis=1).values
	y = drug_df.Drug.values

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.3, random_state=125
	)

	# Pipeline
	cat_col = [1, 2, 3]
	num_col = [0, 4]

	transform = ColumnTransformer(
	[
	("encoder", OrdinalEncoder(), cat_col),
	("num_imputer", SimpleImputer(strategy="median"), num_col),
	("num_scaler", StandardScaler(), num_col),
	]
	)

	pipe = Pipeline(
	steps=[
	("preprocessing", transform),
	("model", RandomForestClassifier(n_estimators=10, random_state=125)),
	]
	)

	# train
	pipe.fit(X_train, y_train)

	# Model Evaluation
	predictions = pipe.predict(X_test)
	accuracy = accuracy_score(y_test, predictions)
	f1 = f1_score(y_test, predictions, average="macro")

	print("Accuracy: ", str(round(accuracy, 2) * 100) + "%", "F1: ", round(f1, 2))

	# Confusion matrix
	import matplotlib.pyplot as plt
	from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

	predictions = pipe.predict(X_test)
	cm = confusion_matrix(y_test, predictions, labels=pipe.classes_)
	disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe.classes_)
	disp.plot()
	plt.savefig("./results/model_result.png", dpi=120)

	# Write metrics to files
	with open("./results/metrics.txt", "w") as outfile:
	outfile.write(f"\nAccuracy={round(accuracy, 2)}, F1_score = {round(f1, 2)}")

	# Save the model
	import pickle

	# save the model to disk
	filename = "./model/drug_pipeline.sav"
	pickle.dump(pipe, open(filename, "wb"))

	# sio.dump(pipe, "./model/drug_pipeline.skops")