Diabetes / src /streamlit_app.py
Mubeen161's picture
Update src/streamlit_app.py
f186dcc verified
import streamlit as st
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt
# Title
st.title("Diabetes Prediction using Random Forest")
# Load dataset
data = load_diabetes()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="Disease Progression")
# Show data
if st.checkbox("Show raw data"):
st.subheader("Raw Data")
st.write(X.head())
# Train-test split slider
test_size = st.slider("Test Size (%)", 10, 50, 20)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_size / 100, random_state=42
)
# Hyperparameters
n_estimators = st.slider("Number of Trees (n_estimators)", 10, 200, 100)
max_depth = st.slider("Max Depth", 1, 30, 10)
# Train model button
if st.button("Train Model"):
model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
st.subheader("Evaluation Metrics")
st.write(f"Mean Squared Error: {mse:.2f}")
st.write(f"R² Score: {r2:.2f}")
# Plot actual vs predicted
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred, alpha=0.7)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--")
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title("Actual vs Predicted")
st.pyplot(fig)