ML_streamlit / app.py
Roberta2024's picture
Create app.py
7309be7 verified
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
import streamlit as st
def process_and_evaluate(file):
# Load the dataset
df = pd.read_csv(file)
# Encode categorical features
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le
# Define the target and features
target = 'target' # Assuming the target column is named 'target'
X = df.drop(columns=[target])
y = df[target]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
# Predict on the test set
y_pred = clf.predict(X_test)
# Compute the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
# Generate the classification report
classification_rep = classification_report(y_test, y_pred)
return df, conf_matrix, classification_rep
# Streamlit interface
st.title("Heart Disease Prediction")
st.write("Upload a CSV file containing heart disease data to get the classification report.")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
df, conf_matrix, report = process_and_evaluate(uploaded_file)
# Display the classification report
st.subheader("Classification Report")
st.text(report)
# Plot the correlation matrix
st.subheader("Correlation Matrix")
corr_matrix = df.corr()
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
st.pyplot(fig)
# Plot the confusion matrix
st.subheader("Confusion Matrix")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
st.pyplot(fig)