import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import confusion_matrix, classification_report import streamlit as st def process_and_evaluate(file): # Load the dataset df = pd.read_csv(file) # Encode categorical features categorical_columns = df.select_dtypes(include=['object']).columns label_encoders = {} for col in categorical_columns: le = LabelEncoder() df[col] = le.fit_transform(df[col]) label_encoders[col] = le # Define the target and features target = 'target' # Assuming the target column is named 'target' X = df.drop(columns=[target]) y = df[target] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train a RandomForestClassifier clf = RandomForestClassifier(random_state=42) clf.fit(X_train, y_train) # Predict on the test set y_pred = clf.predict(X_test) # Compute the confusion matrix conf_matrix = confusion_matrix(y_test, y_pred) # Generate the classification report classification_rep = classification_report(y_test, y_pred) return df, conf_matrix, classification_rep # Streamlit interface st.title("Heart Disease Prediction") st.write("Upload a CSV file containing heart disease data to get the classification report.") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: df, conf_matrix, report = process_and_evaluate(uploaded_file) # Display the classification report st.subheader("Classification Report") st.text(report) # Plot the correlation matrix st.subheader("Correlation Matrix") corr_matrix = df.corr() fig, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax) st.pyplot(fig) # Plot the confusion matrix st.subheader("Confusion Matrix") fig, ax = plt.subplots() sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax) ax.set_xlabel('Predicted') ax.set_ylabel('Actual') st.pyplot(fig)