Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score | |
| import streamlit as st | |
| def process_and_evaluate(df): | |
| # Encode categorical features | |
| categorical_columns = df.select_dtypes(include=['object']).columns | |
| label_encoders = {} | |
| for col in categorical_columns: | |
| le = LabelEncoder() | |
| df[col] = le.fit_transform(df[col]) | |
| label_encoders[col] = le | |
| # Define the target and features | |
| target = 'target' # Assuming the target column is named 'target' | |
| X = df.drop(columns=[target]) | |
| y = df[target] | |
| # Split the data into training and testing sets | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Train a RandomForestClassifier | |
| clf = RandomForestClassifier(random_state=42) | |
| clf.fit(X_train, y_train) | |
| # Predict on the test set | |
| y_pred = clf.predict(X_test) | |
| y_prob = clf.predict_proba(X_test)[:, 1] # Get probabilities for ROC | |
| # Compute the confusion matrix and classification report | |
| conf_matrix = confusion_matrix(y_test, y_pred) | |
| classification_rep = classification_report(y_test, y_pred) | |
| # Calculate and plot the correlation matrix | |
| correlation_matrix = df.corr() | |
| plt.figure(figsize=(10, 8)) | |
| sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm') | |
| plt.title('相依矩陣') | |
| plt.savefig('correlation_matrix.png') # Save the plot as an image | |
| plt.close() # Close the plot to free memory | |
| # Compute ROC curve and AUC | |
| fpr, tpr, thresholds = roc_curve(y_test, y_prob) | |
| roc_auc = roc_auc_score(y_test, y_prob) | |
| # Plot ROC curve | |
| plt.figure(figsize=(10, 6)) | |
| plt.plot(fpr, tpr, color='blue', label='ROC 曲線 (AUC = {:.2f})'.format(roc_auc)) | |
| plt.plot([0, 1], [0, 1], color='red', linestyle='--') | |
| plt.xlim([0.0, 1.0]) | |
| plt.ylim([0.0, 1.05]) | |
| plt.xlabel('假陽性率') | |
| plt.ylabel('真正率') | |
| plt.title('Receiver Operating Characteristic (ROC) 曲線') | |
| plt.legend(loc='lower right') | |
| plt.savefig('roc_curve.png') # Save the ROC curve as an image | |
| plt.close() # Close the plot to free memory | |
| return classification_rep, conf_matrix, 'correlation_matrix.png', 'roc_curve.png' | |
| # Create the Streamlit app | |
| st.set_page_config(page_title="心臟病預測系統", layout="wide") | |
| st.title("心臟病預測系統") | |
| st.markdown("<h5 style='text-align: center;'>上傳包含心臟病數據的 CSV 文件以獲取分類報告、相依矩陣和 ROC 曲線。</h5>", unsafe_allow_html=True) | |
| uploaded_file = st.file_uploader("上傳 CSV 文件", type="csv") | |
| if uploaded_file is not None: | |
| # Load the dataset directly from the uploaded file | |
| df = pd.read_csv(uploaded_file) | |
| # Process the data and generate reports | |
| classification_report, conf_matrix, correlation_matrix_path, roc_curve_path = process_and_evaluate(df) | |
| st.subheader("分類報告") | |
| st.text_area("分類報告", classification_report, height=400) | |
| st.subheader("混淆矩陣") | |
| # Plot and display the confusion matrix | |
| plt.figure(figsize=(8, 6)) | |
| sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive']) | |
| plt.ylabel('實際值') | |
| plt.xlabel('預測值') | |
| plt.title('混淆矩陣') | |
| plt.savefig('confusion_matrix.png') # Save the confusion matrix as an image | |
| plt.close() # Close the plot to free memory | |
| st.image('confusion_matrix.png') | |
| st.subheader("相依矩陣") | |
| st.image(correlation_matrix_path) | |
| st.subheader("ROC 曲 |