import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
import gradio as gd


class ClassID3Decisiontree:
    def __init__(self):
        self.tree = {}


    def fit(self,X,y):
        #存储特征名称
        self.features = list(X.columns)
        #递归构建决策树
        self.tree = self.id3(X,y,self.features)


    def id3(self,X,y,features):
        #判断是否都是同一类
        if len(set(y)) == 1:
            return y.iloc[0]
        #没有特征进行选择则选取最多样本数返回
        if len(features) == 0:
            return Counter(y).most_common(1)[0][0]
        #求取最优特征
        best_features = self.choose_best_features(X,y,features)
        # print(best_features)
        #构架
        tree = {best_features: {}}
        #去掉当前最优特征
        features = [i for i in features if i != best_features]
        #X中去掉每一行的最有特征值,并进行循环构建决策树
        for value in X[best_features].unique():
            sub_X = X[X[best_features] == value].drop([best_features],axis = 1)
            sub_y = y[X[best_features] == value]
            tree[best_features][value] = self.id3(sub_X,sub_y,features)
        return tree


    def choose_best_features(self,X,y,features):
        best_gain = 0
        best_features = None
        # print(best_features)
        #信息熵
        all_entropy = self.calc_all_entropy(y)
        for feature in features:
            #每个特征信息增益
            info_gain = all_entropy - self.calc_conditions_entropy(X,y,feature)
            if info_gain > best_gain:
                best_gain = info_gain
                best_features = feature
                # print(best_features)
            # if info_gain <= 0:
            #     return None
        return best_features
    

    #求信息熵
    def calc_all_entropy(self,y):
        counts = np.array([Counter(y)['否'],Counter(y)['是']])
        # print(counts)
        probabilities = counts / len(y)
        # print(probabilities)
        entrop = -np.sum([i * np.log2(i) for i in probabilities if i > 0])
        return entrop
    

    #求信息增益中的后半部分
    def calc_conditions_entropy(self,X,y,feature):
        single_entrop = 0
        for value in X[feature].unique():
            sub_y = y[X[feature] == value]
            prob = len(sub_y) / len(y)
            single_entrop += prob * self.calc_all_entropy(sub_y)
        return single_entrop
    

    #预测
    def predict(self,X):
        #遍历每一行进行预测
        results = np.array([self._predict(self.tree,sample) for index,sample in X.iterrows()])
        return results
    

    def _predict(self,tree,sample):
        #判断是否到底
        if not isinstance(tree,dict):
            return tree
        root = next(iter(tree))
        feature_value = sample[root]
        #特征值在预测树中则继续进行，否则返回空值失败
        if feature_value in tree[root]:
            return self._predict(tree[root][feature_value],sample)
        else:
            return None


def run(SeZe,GenDi,QiaoSheng,WengLi,QiBu,ChuGan):
    #将读入数据集
    with open('./1.txt',mode ='r',encoding = 'utf-8') as f:
            data = f.read()
    with open('./1.txt',mode ='w',encoding = 'utf-8') as f:
            f.write(data.replace(',',' '))
    text = pd.read_table('./1.txt',sep = ' ')
    X = text.drop(['编号','好瓜'],axis = 1)
    y = text['好瓜']

    #分为训练集和测试集
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    #调用类
    model = ClassID3Decisiontree()
    #训练和预测
    model.fit(X,y)
    #输入
    dict = {
            "色泽":[SeZe],
            "根蒂":[GenDi],
            "敲声":[QiaoSheng],
            "纹理":[WengLi],
            "脐部":[QiBu],
            "触感":[ChuGan]
        }
    user_input = pd.DataFrame(
        dict
    )
    predictions = model.predict(user_input)
    # print(X_test)
    # print(predictions)
    #判断有多大比例符合预期
    return  f'你的瓜是好瓜?   {predictions}' if predictions else "条件不足以判断好瓜坏瓜"
SeZe = ["青绿","乌黑","浅白"]
GenDi = ["硬挺","稍蜷","蜷缩"]
QiaoSheng = ["浊响","沉闷","清脆"]
WengLi = ["清晰","稍糊","模糊"]
QiBu = ["凹陷","平坦","稍凹"]
ChuGan = ["硬滑","软粘"]
gd_watermelon = gd.Interface(fn = run,inputs = [gd.Radio(SeZe),gd.Radio(GenDi),gd.Radio(QiaoSheng),gd.Radio(WengLi),gd.Radio(QiBu),gd.Radio(ChuGan)],outputs = 'text',live = True)
gd_watermelon.launch()