|
|
import pandas as pd |
|
|
import seaborn as sns |
|
|
import streamlit as st |
|
|
import matplotlib.pyplot as plt |
|
|
from sklearn.preprocessing import LabelEncoder |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc |
|
|
|
|
|
def process_and_evaluate(file): |
|
|
|
|
|
df = pd.read_csv(file) |
|
|
|
|
|
|
|
|
categorical_columns = df.select_dtypes(include=['object']).columns |
|
|
label_encoders = {} |
|
|
for col in categorical_columns: |
|
|
le = LabelEncoder() |
|
|
df[col] = le.fit_transform(df[col]) |
|
|
label_encoders[col] = le |
|
|
|
|
|
|
|
|
target = 'target' |
|
|
X = df.drop(columns=[target]) |
|
|
y = df[target] |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
clf = RandomForestClassifier(random_state=42) |
|
|
clf.fit(X_train, y_train) |
|
|
|
|
|
|
|
|
y_pred = clf.predict(X_test) |
|
|
y_pred_prob = clf.predict_proba(X_test)[:, 1] |
|
|
|
|
|
|
|
|
conf_matrix = confusion_matrix(y_test, y_pred) |
|
|
|
|
|
|
|
|
classification_rep = classification_report(y_test, y_pred) |
|
|
|
|
|
|
|
|
fpr, tpr, _ = roc_curve(y_test, y_pred_prob) |
|
|
roc_auc = auc(fpr, tpr) |
|
|
|
|
|
return df, conf_matrix, classification_rep, fpr, tpr, roc_auc |
|
|
|
|
|
def main(): |
|
|
st.title("心臟病預測") |
|
|
st.write("上傳包含心臟病數據的CSV文件,以獲取分類報告、混淆矩陣、相關矩陣、ROC曲線和AUC。") |
|
|
|
|
|
uploaded_file = st.file_uploader("選擇一個CSV文件", type="csv") |
|
|
if uploaded_file is not None: |
|
|
df, conf_matrix, classification_rep, fpr, tpr, roc_auc = process_and_evaluate(uploaded_file) |
|
|
|
|
|
st.subheader("分類報告") |
|
|
st.text(classification_rep) |
|
|
|
|
|
st.subheader("混淆矩陣") |
|
|
fig, ax = plt.subplots() |
|
|
sns.heatmap(conf_matrix, annot=True, fmt='d', ax=ax, cmap='Blues') |
|
|
plt.title("混淆矩陣") |
|
|
ax.set_xlabel('預測標籤') |
|
|
ax.set_ylabel('實際標籤') |
|
|
st.pyplot(fig) |
|
|
|
|
|
st.subheader("相關矩陣") |
|
|
corr_matrix = df.corr() |
|
|
fig, ax = plt.subplots(figsize=(10, 8)) |
|
|
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax) |
|
|
plt.title("相關矩陣") |
|
|
ax.set_xlabel('特徵') |
|
|
ax.set_ylabel('特徵') |
|
|
st.pyplot(fig) |
|
|
|
|
|
st.subheader("ROC曲線和AUC") |
|
|
fig, ax = plt.subplots(figsize=(10, 8)) |
|
|
ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC曲線 (AUC = {roc_auc:.2f})') |
|
|
ax.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--') |
|
|
ax.set_xlim([0.0, 1.0]) |
|
|
ax.set_ylim([0.0, 1.05]) |
|
|
ax.set_xlabel('假陽性率', fontsize=14) |
|
|
ax.set_ylabel('真正例率', fontsize=14) |
|
|
ax.set_title('接收者操作特徵曲線 (ROC)', fontsize=16) |
|
|
ax.legend(loc="lower right", fontsize=12) |
|
|
ax.grid(True, linestyle='--', linewidth=0.5) |
|
|
st.pyplot(fig) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|