Spaces:

99user99
/

gradio_first

Runtime error

App Files Files Community

gradio_first / app.py

99user99

Upload app.py

73f1f31 verified almost 2 years ago

raw

history blame contribute delete

4.68 kB

	import numpy as np
	import pandas as pd
	from collections import Counter
	from sklearn.model_selection import train_test_split
	import gradio as gd


	class ClassID3Decisiontree:
	def __init__(self):
	self.tree = {}


	def fit(self,X,y):
	#存储特征名称
	self.features = list(X.columns)
	#递归构建决策树
	self.tree = self.id3(X,y,self.features)


	def id3(self,X,y,features):
	#判断是否都是同一类
	if len(set(y)) == 1:
	return y.iloc[0]
	#没有特征进行选择则选取最多样本数返回
	if len(features) == 0:
	return Counter(y).most_common(1)[0][0]
	#求取最优特征
	best_features = self.choose_best_features(X,y,features)
	# print(best_features)
	#构架
	tree = {best_features: {}}
	#去掉当前最优特征
	features = [i for i in features if i != best_features]
	#X中去掉每一行的最有特征值,并进行循环构建决策树
	for value in X[best_features].unique():
	sub_X = X[X[best_features] == value].drop([best_features],axis = 1)
	sub_y = y[X[best_features] == value]
	tree[best_features][value] = self.id3(sub_X,sub_y,features)
	return tree


	def choose_best_features(self,X,y,features):
	best_gain = 0
	best_features = None
	# print(best_features)
	#信息熵
	all_entropy = self.calc_all_entropy(y)
	for feature in features:
	#每个特征信息增益
	info_gain = all_entropy - self.calc_conditions_entropy(X,y,feature)
	if info_gain > best_gain:
	best_gain = info_gain
	best_features = feature
	# print(best_features)
	# if info_gain <= 0:
	# return None
	return best_features


	#求信息熵
	def calc_all_entropy(self,y):
	counts = np.array([Counter(y)['否'],Counter(y)['是']])
	# print(counts)
	probabilities = counts / len(y)
	# print(probabilities)
	entrop = -np.sum([i * np.log2(i) for i in probabilities if i > 0])
	return entrop


	#求信息增益中的后半部分
	def calc_conditions_entropy(self,X,y,feature):
	single_entrop = 0
	for value in X[feature].unique():
	sub_y = y[X[feature] == value]
	prob = len(sub_y) / len(y)
	single_entrop += prob * self.calc_all_entropy(sub_y)
	return single_entrop


	#预测
	def predict(self,X):
	#遍历每一行进行预测
	results = np.array([self._predict(self.tree,sample) for index,sample in X.iterrows()])
	return results


	def _predict(self,tree,sample):
	#判断是否到底
	if not isinstance(tree,dict):
	return tree
	root = next(iter(tree))
	feature_value = sample[root]
	#特征值在预测树中则继续进行，否则返回空值失败
	if feature_value in tree[root]:
	return self._predict(tree[root][feature_value],sample)
	else:
	return None


	def run(SeZe,GenDi,QiaoSheng,WengLi,QiBu,ChuGan):
	#将读入数据集
	with open('./1.txt',mode ='r',encoding = 'utf-8') as f:
	data = f.read()
	with open('./1.txt',mode ='w',encoding = 'utf-8') as f:
	f.write(data.replace(',',' '))
	text = pd.read_table('./1.txt',sep = ' ')
	X = text.drop(['编号','好瓜'],axis = 1)
	y = text['好瓜']

	#分为训练集和测试集
	# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	#调用类
	model = ClassID3Decisiontree()
	#训练和预测
	model.fit(X,y)
	#输入
	dict = {
	"色泽":[SeZe],
	"根蒂":[GenDi],
	"敲声":[QiaoSheng],
	"纹理":[WengLi],
	"脐部":[QiBu],
	"触感":[ChuGan]
	}
	user_input = pd.DataFrame(
	dict
	)
	predictions = model.predict(user_input)
	# print(X_test)
	# print(predictions)
	#判断有多大比例符合预期
	return f'你的瓜是好瓜? {predictions}' if predictions else "条件不足以判断好瓜坏瓜"
	SeZe = ["青绿","乌黑","浅白"]
	GenDi = ["硬挺","稍蜷","蜷缩"]
	QiaoSheng = ["浊响","沉闷","清脆"]
	WengLi = ["清晰","稍糊","模糊"]
	QiBu = ["凹陷","平坦","稍凹"]
	ChuGan = ["硬滑","软粘"]
	gd_watermelon = gd.Interface(fn = run,inputs = [gd.Radio(SeZe),gd.Radio(GenDi),gd.Radio(QiaoSheng),gd.Radio(WengLi),gd.Radio(QiBu),gd.Radio(ChuGan)],outputs = 'text',live = True)
	gd_watermelon.launch()