Spaces:
Runtime error
Runtime error
File size: 4,683 Bytes
ff180a7 73f1f31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
import gradio as gd
class ClassID3Decisiontree:
def __init__(self):
self.tree = {}
def fit(self,X,y):
#存储特征名称
self.features = list(X.columns)
#递归构建决策树
self.tree = self.id3(X,y,self.features)
def id3(self,X,y,features):
#判断是否都是同一类
if len(set(y)) == 1:
return y.iloc[0]
#没有特征进行选择则选取最多样本数返回
if len(features) == 0:
return Counter(y).most_common(1)[0][0]
#求取最优特征
best_features = self.choose_best_features(X,y,features)
# print(best_features)
#构架
tree = {best_features: {}}
#去掉当前最优特征
features = [i for i in features if i != best_features]
#X中去掉每一行的最有特征值,并进行循环构建决策树
for value in X[best_features].unique():
sub_X = X[X[best_features] == value].drop([best_features],axis = 1)
sub_y = y[X[best_features] == value]
tree[best_features][value] = self.id3(sub_X,sub_y,features)
return tree
def choose_best_features(self,X,y,features):
best_gain = 0
best_features = None
# print(best_features)
#信息熵
all_entropy = self.calc_all_entropy(y)
for feature in features:
#每个特征信息增益
info_gain = all_entropy - self.calc_conditions_entropy(X,y,feature)
if info_gain > best_gain:
best_gain = info_gain
best_features = feature
# print(best_features)
# if info_gain <= 0:
# return None
return best_features
#求信息熵
def calc_all_entropy(self,y):
counts = np.array([Counter(y)['否'],Counter(y)['是']])
# print(counts)
probabilities = counts / len(y)
# print(probabilities)
entrop = -np.sum([i * np.log2(i) for i in probabilities if i > 0])
return entrop
#求信息增益中的后半部分
def calc_conditions_entropy(self,X,y,feature):
single_entrop = 0
for value in X[feature].unique():
sub_y = y[X[feature] == value]
prob = len(sub_y) / len(y)
single_entrop += prob * self.calc_all_entropy(sub_y)
return single_entrop
#预测
def predict(self,X):
#遍历每一行进行预测
results = np.array([self._predict(self.tree,sample) for index,sample in X.iterrows()])
return results
def _predict(self,tree,sample):
#判断是否到底
if not isinstance(tree,dict):
return tree
root = next(iter(tree))
feature_value = sample[root]
#特征值在预测树中则继续进行,否则返回空值失败
if feature_value in tree[root]:
return self._predict(tree[root][feature_value],sample)
else:
return None
def run(SeZe,GenDi,QiaoSheng,WengLi,QiBu,ChuGan):
#将读入数据集
with open('./1.txt',mode ='r',encoding = 'utf-8') as f:
data = f.read()
with open('./1.txt',mode ='w',encoding = 'utf-8') as f:
f.write(data.replace(',',' '))
text = pd.read_table('./1.txt',sep = ' ')
X = text.drop(['编号','好瓜'],axis = 1)
y = text['好瓜']
#分为训练集和测试集
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#调用类
model = ClassID3Decisiontree()
#训练和预测
model.fit(X,y)
#输入
dict = {
"色泽":[SeZe],
"根蒂":[GenDi],
"敲声":[QiaoSheng],
"纹理":[WengLi],
"脐部":[QiBu],
"触感":[ChuGan]
}
user_input = pd.DataFrame(
dict
)
predictions = model.predict(user_input)
# print(X_test)
# print(predictions)
#判断有多大比例符合预期
return f'你的瓜是好瓜? {predictions}' if predictions else "条件不足以判断好瓜坏瓜"
SeZe = ["青绿","乌黑","浅白"]
GenDi = ["硬挺","稍蜷","蜷缩"]
QiaoSheng = ["浊响","沉闷","清脆"]
WengLi = ["清晰","稍糊","模糊"]
QiBu = ["凹陷","平坦","稍凹"]
ChuGan = ["硬滑","软粘"]
gd_watermelon = gd.Interface(fn = run,inputs = [gd.Radio(SeZe),gd.Radio(GenDi),gd.Radio(QiaoSheng),gd.Radio(WengLi),gd.Radio(QiBu),gd.Radio(ChuGan)],outputs = 'text',live = True)
gd_watermelon.launch() |