Spaces:

boringnose
/

Decision_Tree_Visualizer

Runtime error

App Files Files Community

Decision_Tree_Visualizer / app.py

boringnose

Add Constant ccp_alpha

fbcb291 verified over 1 year ago

raw

history blame contribute delete

7.56 kB

	import pandas as pd
	import numpy as np
	import streamlit as st
	import time

	from sklearn import datasets
	from sklearn.model_selection import train_test_split
	from sklearn.tree import DecisionTreeClassifier
	from sklearn import tree
	from sklearn.metrics import accuracy_score
	import matplotlib.pyplot as plt

	st.set_page_config(
	page_title="Decision Tree Visualizer",
	page_icon=":chart_with_upwards_trend:",
	layout="wide",
	initial_sidebar_state="expanded")

	# load dataset
	iris=datasets.load_iris()
	x = iris.data
	y = iris.target
	x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=42)


	# constants
	min_weight_fraction_leaf=0.0
	max_features = None
	max_leaf_nodes = None
	min_impurity_decrease=0.0
	ccp_alpha = 0.0



	# Load initial graph
	fig, ax = plt.subplots()

	# Plot initial graph
	scatter = ax.scatter(x.T[0], x.T[1], c=y, cmap='rainbow')
	ax.set_xlabel(iris.feature_names[0], fontsize=10)
	ax.set_ylabel(iris.feature_names[1],fontsize=10)
	ax.set_title('Sepal Length vs Sepal Width', fontsize=15)
	legend1 = ax.legend(*scatter.legend_elements(),
	title="Classes",loc="upper right")
	ax.add_artist(legend1)
	ax.legend()
	orig = st.pyplot(fig)

	# sidebar elements
	st.sidebar.header(':blue[_Decision Tree_] Algo Visualizer', divider='rainbow')

	criterion = st.sidebar.selectbox("Criterion",
	("gini", "entropy", "log_loss"),
	help="""The function to measure the quality of a split.
	Supported criteria are “gini” for the Gini impurity and “log_loss” and “entropy”
	both for the Shannon information gain""")
	max_depth = st.sidebar.number_input("Max Depth",
	min_value=0,
	max_value=30,
	step=1,
	value=0,
	help="""The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure""")
	if max_depth == 0:
	max_depth=None
	min_samples_split = st.sidebar.number_input("Min Sample Split",
	min_value=0,
	max_value=x_train.shape[0],
	value=2,
	help="""The minimum number of samples required to split an internal node.
	If float, enter between 0 and 1""")
	min_samples_leaf = st.sidebar.number_input("Min sample Leaf",
	min_value=0,
	max_value=x_train.shape[0],
	value=1,
	help="""The minimum number of samples required to be at a leaf node.
	If float, enter between 0 and 1""")
	random_state = st.sidebar.number_input("Random State",
	min_value=0,
	value=42)

	# advance features
	toggle = st.sidebar.toggle("Advance Features")

	if toggle:
	min_weight_fraction_leaf = st.sidebar.number_input("Min Weight Fraction Leaf",
	min_value=0.0,
	max_value=1.0,
	value=0.0,
	help="""The minimum weighted fraction of the sum total of weights
	(of all the input samples) required to be at a leaf node. """)
	max_features = st.sidebar.selectbox("Max Features",
	(None,"sqrt", "log2","Custom"),
	help="""The number of features to consider when looking for the best split""")
	if max_features == "Custom":
	max_features = st.sidebar.number_input("Enter Max Features",
	value=None,
	step=1)

	max_leaf_nodes = st.sidebar.number_input("Max Leaf Nodes",
	min_value=0,
	help="""Grow a tree with max_leaf_nodes in best-first fashion. """)
	if max_leaf_nodes==0:
	max_leaf_nodes=None
	min_impurity_decrease = st.sidebar.number_input("Min Impurity Decrase",
	min_value=0.0,
	help="""A node will be split if this split induces a decrease of the
	impurity greater than or equal to this value.""")
	ccp_alpha = st.sidebar.number_input("ccp_alpha",
	min_value=0,
	max_value=30,
	step=0.1,
	value=0,
	help="""Complexity parameter used for Minimal Cost-Complexity Pruning.
	The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen.
	By default, no pruning is performed.""")
	train = st.sidebar.button("Train Model", type="primary")
	if st.sidebar.button("Reset"):
	st.experimental_rerun()
	if train:
	orig.empty()

	msg = st.toast('Running', icon='🫸🏼')
	# building model
	clf = DecisionTreeClassifier(criterion=criterion,max_depth=max_depth,
	min_samples_split=min_samples_split,
	min_samples_leaf=min_samples_leaf,
	min_weight_fraction_leaf=min_weight_fraction_leaf,
	max_features=max_features,
	random_state=random_state,
	max_leaf_nodes=max_leaf_nodes,
	min_impurity_decrease=min_impurity_decrease,
	ccp_alpha = ccp_alpha)
	clf.fit(x_train[:, :2], y_train)
	x_pred = clf.predict(x_train[:,:2])
	y_pred = clf.predict(x_test[:, :2])
	st.subheader("Train Accuracy " + str(round(accuracy_score(y_train, x_pred), 2)) + ", "+ "Test Accuracy " + str(round(accuracy_score(y_test, y_pred), 2)))
	st.write("Total Depth: " + str(clf.tree_.max_depth))


	# # define ranges for meshgrid
	x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
	y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1

	xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
	np.arange(y_min, y_max, 0.01))

	Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)

	# Plot the decision boundaries
	plt.figure(figsize=(8, 6))
	plt.contourf(xx, yy, Z, alpha=0.8)
	plt.scatter(x[:, 0], x[:, 1], c=y, edgecolors='k', s=20)
	plt.xlabel('Sepal length')
	plt.ylabel('Sepal width')
	plt.title('Decision Boundaries')
	plt.tight_layout()
	plt.savefig('decision_boundary_plot.png')
	plt.close()

	# Display decision boundary plot
	st.image("decision_boundary_plot.png")

	# Plot decision tree
	plt.figure(figsize=(25, 20))
	tree.plot_tree(clf, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
	plt.xlim(plt.xlim()[0] * 2, plt.xlim()[1] * 2)
	plt.ylim(plt.ylim()[0] * 2, plt.ylim()[1] * 2)
	plt.savefig("decision_tree.png")
	plt.close()

	# Display decision tree plot
	st.image("decision_tree.png")

	msg.toast('Model run successfully!', icon='😎')