import pandas as pd
import seaborn as sns
import streamlit as st
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

def process_and_evaluate(file):
    # 載入數據集
    df = pd.read_csv(file)
    
    # 編碼分類特徵
    categorical_columns = df.select_dtypes(include=['object']).columns
    label_encoders = {}
    for col in categorical_columns:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
    
    # 定義目標變數和特徵
    target = 'target'  # 假設目標列名為 'target'
    X = df.drop(columns=[target])
    y = df[target]
    
    # 將數據集拆分為訓練集和測試集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # 訓練隨機森林分類器
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train, y_train)
    
    # 在測試集上進行預測
    y_pred = clf.predict(X_test)
    y_pred_prob = clf.predict_proba(X_test)[:, 1]  # 正類的概率得分

    # 計算混淆矩陣
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    # 計算分類報告
    classification_rep = classification_report(y_test, y_pred)
    
    # 計算ROC曲線和AUC
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)
    
    return df, conf_matrix, classification_rep, fpr, tpr, roc_auc

def main():
    st.title("心臟病預測")
    st.write("上傳包含心臟病數據的CSV文件，以獲取分類報告、混淆矩陣、相關矩陣、ROC曲線和AUC。")

    uploaded_file = st.file_uploader("選擇一個CSV文件", type="csv")
    if uploaded_file is not None:
        df, conf_matrix, classification_rep, fpr, tpr, roc_auc = process_and_evaluate(uploaded_file)

        st.subheader("分類報告")
        st.text(classification_rep)

        st.subheader("混淆矩陣")
        fig, ax = plt.subplots()
        sns.heatmap(conf_matrix, annot=True, fmt='d', ax=ax, cmap='Blues')
        plt.title("混淆矩陣")
        ax.set_xlabel('預測標籤')
        ax.set_ylabel('實際標籤')
        st.pyplot(fig)

        st.subheader("相關矩陣")
        corr_matrix = df.corr()
        fig, ax = plt.subplots(figsize=(10, 8))
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
        plt.title("相關矩陣")
        ax.set_xlabel('特徵')
        ax.set_ylabel('特徵')
        st.pyplot(fig)

        st.subheader("ROC曲線和AUC")
        fig, ax = plt.subplots(figsize=(10, 8))  # 更大的圖形尺寸以便更清晰地顯示
        ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC曲線 (AUC = {roc_auc:.2f})')
        ax.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
        ax.set_xlim([0.0, 1.0])
        ax.set_ylim([0.0, 1.05])
        ax.set_xlabel('假陽性率', fontsize=14)
        ax.set_ylabel('真正例率', fontsize=14)
        ax.set_title('接收者操作特徵曲線 (ROC)', fontsize=16)
        ax.legend(loc="lower right", fontsize=12)
        ax.grid(True, linestyle='--', linewidth=0.5)
        st.pyplot(fig)

if __name__ == "__main__":
    main()