gradio_first / app.py
99user99's picture
Upload app.py
73f1f31 verified
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
import gradio as gd
class ClassID3Decisiontree:
def __init__(self):
self.tree = {}
def fit(self,X,y):
#存储特征名称
self.features = list(X.columns)
#递归构建决策树
self.tree = self.id3(X,y,self.features)
def id3(self,X,y,features):
#判断是否都是同一类
if len(set(y)) == 1:
return y.iloc[0]
#没有特征进行选择则选取最多样本数返回
if len(features) == 0:
return Counter(y).most_common(1)[0][0]
#求取最优特征
best_features = self.choose_best_features(X,y,features)
# print(best_features)
#构架
tree = {best_features: {}}
#去掉当前最优特征
features = [i for i in features if i != best_features]
#X中去掉每一行的最有特征值,并进行循环构建决策树
for value in X[best_features].unique():
sub_X = X[X[best_features] == value].drop([best_features],axis = 1)
sub_y = y[X[best_features] == value]
tree[best_features][value] = self.id3(sub_X,sub_y,features)
return tree
def choose_best_features(self,X,y,features):
best_gain = 0
best_features = None
# print(best_features)
#信息熵
all_entropy = self.calc_all_entropy(y)
for feature in features:
#每个特征信息增益
info_gain = all_entropy - self.calc_conditions_entropy(X,y,feature)
if info_gain > best_gain:
best_gain = info_gain
best_features = feature
# print(best_features)
# if info_gain <= 0:
# return None
return best_features
#求信息熵
def calc_all_entropy(self,y):
counts = np.array([Counter(y)['否'],Counter(y)['是']])
# print(counts)
probabilities = counts / len(y)
# print(probabilities)
entrop = -np.sum([i * np.log2(i) for i in probabilities if i > 0])
return entrop
#求信息增益中的后半部分
def calc_conditions_entropy(self,X,y,feature):
single_entrop = 0
for value in X[feature].unique():
sub_y = y[X[feature] == value]
prob = len(sub_y) / len(y)
single_entrop += prob * self.calc_all_entropy(sub_y)
return single_entrop
#预测
def predict(self,X):
#遍历每一行进行预测
results = np.array([self._predict(self.tree,sample) for index,sample in X.iterrows()])
return results
def _predict(self,tree,sample):
#判断是否到底
if not isinstance(tree,dict):
return tree
root = next(iter(tree))
feature_value = sample[root]
#特征值在预测树中则继续进行,否则返回空值失败
if feature_value in tree[root]:
return self._predict(tree[root][feature_value],sample)
else:
return None
def run(SeZe,GenDi,QiaoSheng,WengLi,QiBu,ChuGan):
#将读入数据集
with open('./1.txt',mode ='r',encoding = 'utf-8') as f:
data = f.read()
with open('./1.txt',mode ='w',encoding = 'utf-8') as f:
f.write(data.replace(',',' '))
text = pd.read_table('./1.txt',sep = ' ')
X = text.drop(['编号','好瓜'],axis = 1)
y = text['好瓜']
#分为训练集和测试集
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#调用类
model = ClassID3Decisiontree()
#训练和预测
model.fit(X,y)
#输入
dict = {
"色泽":[SeZe],
"根蒂":[GenDi],
"敲声":[QiaoSheng],
"纹理":[WengLi],
"脐部":[QiBu],
"触感":[ChuGan]
}
user_input = pd.DataFrame(
dict
)
predictions = model.predict(user_input)
# print(X_test)
# print(predictions)
#判断有多大比例符合预期
return f'你的瓜是好瓜? {predictions}' if predictions else "条件不足以判断好瓜坏瓜"
SeZe = ["青绿","乌黑","浅白"]
GenDi = ["硬挺","稍蜷","蜷缩"]
QiaoSheng = ["浊响","沉闷","清脆"]
WengLi = ["清晰","稍糊","模糊"]
QiBu = ["凹陷","平坦","稍凹"]
ChuGan = ["硬滑","软粘"]
gd_watermelon = gd.Interface(fn = run,inputs = [gd.Radio(SeZe),gd.Radio(GenDi),gd.Radio(QiaoSheng),gd.Radio(WengLi),gd.Radio(QiBu),gd.Radio(ChuGan)],outputs = 'text',live = True)
gd_watermelon.launch()