Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| from collections import Counter | |
| from sklearn.model_selection import train_test_split | |
| import gradio as gd | |
| class ClassID3Decisiontree: | |
| def __init__(self): | |
| self.tree = {} | |
| def fit(self,X,y): | |
| #存储特征名称 | |
| self.features = list(X.columns) | |
| #递归构建决策树 | |
| self.tree = self.id3(X,y,self.features) | |
| def id3(self,X,y,features): | |
| #判断是否都是同一类 | |
| if len(set(y)) == 1: | |
| return y.iloc[0] | |
| #没有特征进行选择则选取最多样本数返回 | |
| if len(features) == 0: | |
| return Counter(y).most_common(1)[0][0] | |
| #求取最优特征 | |
| best_features = self.choose_best_features(X,y,features) | |
| # print(best_features) | |
| #构架 | |
| tree = {best_features: {}} | |
| #去掉当前最优特征 | |
| features = [i for i in features if i != best_features] | |
| #X中去掉每一行的最有特征值,并进行循环构建决策树 | |
| for value in X[best_features].unique(): | |
| sub_X = X[X[best_features] == value].drop([best_features],axis = 1) | |
| sub_y = y[X[best_features] == value] | |
| tree[best_features][value] = self.id3(sub_X,sub_y,features) | |
| return tree | |
| def choose_best_features(self,X,y,features): | |
| best_gain = 0 | |
| best_features = None | |
| # print(best_features) | |
| #信息熵 | |
| all_entropy = self.calc_all_entropy(y) | |
| for feature in features: | |
| #每个特征信息增益 | |
| info_gain = all_entropy - self.calc_conditions_entropy(X,y,feature) | |
| if info_gain > best_gain: | |
| best_gain = info_gain | |
| best_features = feature | |
| # print(best_features) | |
| # if info_gain <= 0: | |
| # return None | |
| return best_features | |
| #求信息熵 | |
| def calc_all_entropy(self,y): | |
| counts = np.array([Counter(y)['否'],Counter(y)['是']]) | |
| # print(counts) | |
| probabilities = counts / len(y) | |
| # print(probabilities) | |
| entrop = -np.sum([i * np.log2(i) for i in probabilities if i > 0]) | |
| return entrop | |
| #求信息增益中的后半部分 | |
| def calc_conditions_entropy(self,X,y,feature): | |
| single_entrop = 0 | |
| for value in X[feature].unique(): | |
| sub_y = y[X[feature] == value] | |
| prob = len(sub_y) / len(y) | |
| single_entrop += prob * self.calc_all_entropy(sub_y) | |
| return single_entrop | |
| #预测 | |
| def predict(self,X): | |
| #遍历每一行进行预测 | |
| results = np.array([self._predict(self.tree,sample) for index,sample in X.iterrows()]) | |
| return results | |
| def _predict(self,tree,sample): | |
| #判断是否到底 | |
| if not isinstance(tree,dict): | |
| return tree | |
| root = next(iter(tree)) | |
| feature_value = sample[root] | |
| #特征值在预测树中则继续进行,否则返回空值失败 | |
| if feature_value in tree[root]: | |
| return self._predict(tree[root][feature_value],sample) | |
| else: | |
| return None | |
| def run(SeZe,GenDi,QiaoSheng,WengLi,QiBu,ChuGan): | |
| #将读入数据集 | |
| with open('./1.txt',mode ='r',encoding = 'utf-8') as f: | |
| data = f.read() | |
| with open('./1.txt',mode ='w',encoding = 'utf-8') as f: | |
| f.write(data.replace(',',' ')) | |
| text = pd.read_table('./1.txt',sep = ' ') | |
| X = text.drop(['编号','好瓜'],axis = 1) | |
| y = text['好瓜'] | |
| #分为训练集和测试集 | |
| # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| #调用类 | |
| model = ClassID3Decisiontree() | |
| #训练和预测 | |
| model.fit(X,y) | |
| #输入 | |
| dict = { | |
| "色泽":[SeZe], | |
| "根蒂":[GenDi], | |
| "敲声":[QiaoSheng], | |
| "纹理":[WengLi], | |
| "脐部":[QiBu], | |
| "触感":[ChuGan] | |
| } | |
| user_input = pd.DataFrame( | |
| dict | |
| ) | |
| predictions = model.predict(user_input) | |
| # print(X_test) | |
| # print(predictions) | |
| #判断有多大比例符合预期 | |
| return f'你的瓜是好瓜? {predictions}' if predictions else "条件不足以判断好瓜坏瓜" | |
| SeZe = ["青绿","乌黑","浅白"] | |
| GenDi = ["硬挺","稍蜷","蜷缩"] | |
| QiaoSheng = ["浊响","沉闷","清脆"] | |
| WengLi = ["清晰","稍糊","模糊"] | |
| QiBu = ["凹陷","平坦","稍凹"] | |
| ChuGan = ["硬滑","软粘"] | |
| gd_watermelon = gd.Interface(fn = run,inputs = [gd.Radio(SeZe),gd.Radio(GenDi),gd.Radio(QiaoSheng),gd.Radio(WengLi),gd.Radio(QiBu),gd.Radio(ChuGan)],outputs = 'text',live = True) | |
| gd_watermelon.launch() |