| | import streamlit as st |
| | from sklearn.datasets import load_diabetes |
| | from sklearn.ensemble import RandomForestRegressor |
| | from sklearn.model_selection import train_test_split |
| | from sklearn.metrics import mean_squared_error, r2_score |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| |
|
| |
|
| | |
| | st.title("Diabetes Prediction using Random Forest") |
| |
|
| | |
| | data = load_diabetes() |
| | X = pd.DataFrame(data.data, columns=data.feature_names) |
| | y = pd.Series(data.target, name="Disease Progression") |
| |
|
| | |
| | if st.checkbox("Show raw data"): |
| | st.subheader("Raw Data") |
| | st.write(X.head()) |
| |
|
| | |
| | test_size = st.slider("Test Size (%)", 10, 50, 20) |
| | X_train, X_test, y_train, y_test = train_test_split( |
| | X, y, test_size=test_size / 100, random_state=42 |
| | ) |
| |
|
| | |
| | n_estimators = st.slider("Number of Trees (n_estimators)", 10, 200, 100) |
| | max_depth = st.slider("Max Depth", 1, 30, 10) |
| |
|
| | |
| | if st.button("Train Model"): |
| | model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42) |
| | model.fit(X_train, y_train) |
| | y_pred = model.predict(X_test) |
| |
|
| | mse = mean_squared_error(y_test, y_pred) |
| | r2 = r2_score(y_test, y_pred) |
| |
|
| | st.subheader("Evaluation Metrics") |
| | st.write(f"Mean Squared Error: {mse:.2f}") |
| | st.write(f"R² Score: {r2:.2f}") |
| |
|
| | |
| | fig, ax = plt.subplots() |
| | ax.scatter(y_test, y_pred, alpha=0.7) |
| | ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--") |
| | ax.set_xlabel("Actual") |
| | ax.set_ylabel("Predicted") |
| | ax.set_title("Actual vs Predicted") |
| | st.pyplot(fig) |
| |
|