import streamlit as st from sklearn.datasets import load_diabetes from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score import pandas as pd import matplotlib.pyplot as plt # Title st.title("Diabetes Prediction using Random Forest") # Load dataset data = load_diabetes() X = pd.DataFrame(data.data, columns=data.feature_names) y = pd.Series(data.target, name="Disease Progression") # Show data if st.checkbox("Show raw data"): st.subheader("Raw Data") st.write(X.head()) # Train-test split slider test_size = st.slider("Test Size (%)", 10, 50, 20) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size / 100, random_state=42 ) # Hyperparameters n_estimators = st.slider("Number of Trees (n_estimators)", 10, 200, 100) max_depth = st.slider("Max Depth", 1, 30, 10) # Train model button if st.button("Train Model"): model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) st.subheader("Evaluation Metrics") st.write(f"Mean Squared Error: {mse:.2f}") st.write(f"R² Score: {r2:.2f}") # Plot actual vs predicted fig, ax = plt.subplots() ax.scatter(y_test, y_pred, alpha=0.7) ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--") ax.set_xlabel("Actual") ax.set_ylabel("Predicted") ax.set_title("Actual vs Predicted") st.pyplot(fig)