# 本文用到的库 import numpy as np import pandas as pd from sklearn.tree import DecisionTreeClassifier import base64 import streamlit as st from sklearn import preprocessing from dtreeviz.trees import * from data import getDataSetOrigin,dataPreprocessing import joblib from DecisionTree import dt_param_selector import numpy as np import matplotlib.pyplot as plt from data import dataPreprocessing from sklearn.tree import DecisionTreeClassifier import streamlit as st def decisionTreeViz(clf): df = dataPreprocessing() X, y = df[df.columns[:-1]], df["label"] viz = dtreeviz( clf, X, y, orientation="LR", target_name="label", feature_names=df.columns[:-1], class_names=["good", "bad"], # need class_names for classifier ) return viz def svg_write(svg, center=True): """ Disable center to left-margin align like other objects. """ # Encode as base 64 b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8") # Add some CSS on top css_justify = "center" if center else "left" css = ( f'

' ) html = f'{css}' # Write the HTML st.write(html, unsafe_allow_html=True, width=800, caption="决策树") def plotSurface(): st.set_option('deprecation.showPyplotGlobalUse', False) # Parameters n_classes = 2 plot_colors = "ryb" plot_step = 0.02 # Load data df = dataPreprocessing() plt.figure(figsize=(8,4)) for pairidx, pair in enumerate([[1, 0], [1, 3], [1, 4], [1, 5], [3, 0], [3, 2], [3, 4], [3, 5]]): # We only take the two corresponding features X, y = df[df.columns[:-1]].values[:, pair], df["label"] # Train clf = DecisionTreeClassifier().fit(X, y) # Plot the decision boundary fig=plt.subplot(2, 4, pairidx + 1) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid( np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step) ) plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu) plt.xlabel(df.columns[pair[0]]) plt.ylabel(df.columns[pair[1]]) # Plot the training points for i, color in zip(range(n_classes), plot_colors): idx = np.where(y == i) plt.scatter( X[idx, 0], X[idx, 1], c=color, label=df["label"][i], cmap=plt.cm.RdYlBu, edgecolor="black", s=15, ) plt.suptitle("Decision surface of a decision tree using paired features") plt.legend(loc="lower right", borderpad=0, handletextpad=0) plt.axis("tight") # plt.show() plt.tight_layout() st.pyplot()