import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.svm import SVR, SVC
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report
from sklearn.impute import SimpleImputer
from sklearn.datasets import load_iris, fetch_california_housing
import base64
# Set page config
st.set_page_config(
page_title="Machine Learning App",
page_icon="https://i.imgur.com/C6lAamP.png",
layout="wide",
initial_sidebar_state="expanded",
)
# Sidebar logo
st.sidebar.image("https://i.imgur.com/kpkwAUT.png", use_column_width=True)
#function to load a bg image
def set_bg_hack_url():
'''
A function to unpack an image from url and set as bg.
Returns
-------
The background.
'''
st.markdown(
f"""
""",
unsafe_allow_html=True
)
# Call the function to set background image
set_bg_hack_url()
# Custom CSS styles with base64-encoded background image
st.markdown(
f"""
""",
unsafe_allow_html=True,
)
# Function to build regression model
def build_regression_model(df, algorithm, hyperparameters):
X = df.iloc[:, :-1] # Features
y = df.iloc[:, -1] # Target
# Data splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
st.markdown('**1.2. Data splits**')
st.write('Training set')
st.info(X_train.shape)
st.write('Test set')
st.info(X_test.shape)
st.markdown('**1.3. Variable details**:')
st.write('X variable')
st.info(list(X.columns))
st.write('Y variable')
st.info(y.name)
model = algorithm(**hyperparameters)
model.fit(X_train, y_train)
# Model performance
st.subheader('2. Model Performance')
st.markdown('**2.1. Training set**')
y_pred_train = model.predict(X_train)
st.write('Coefficient of determination ($R^2$):')
st.info(r2_score(y_train, y_pred_train))
st.write('Error (MSE or MAE):')
st.info(mean_squared_error(y_train, y_pred_train))
st.markdown('**2.2. Test set**')
y_pred_test = model.predict(X_test)
st.write('Coefficient of determination ($R^2$):')
st.info(r2_score(y_test, y_pred_test))
st.write('Error (MSE or MAE):')
st.info(mean_squared_error(y_test, y_pred_test))
# Model parameters
st.subheader('3. Model Parameters')
st.write(model.get_params())
return model
# Function to build classification model
def build_classification_model(df, algorithm, hyperparameters):
X = df.iloc[:, :-1] # Features
y = df.iloc[:, -1] # Target
# Data splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
st.markdown('**1.2. Data splits**')
st.write('Training set')
st.info(X_train.shape)
st.write('Test set')
st.info(X_test.shape)
st.markdown('**1.3. Variable details**:')
st.write('X variable')
st.info(list(X.columns))
st.write('Y variable')
st.info(y.name)
model = algorithm(**hyperparameters)
model.fit(X_train, y_train)
# Model performance
st.subheader('2. Model Performance')
st.markdown('**2.1. Training set**')
y_pred_train = model.predict(X_train)
st.write('Accuracy Score:')
st.info(accuracy_score(y_train, y_pred_train))
st.markdown('**2.2. Test set**')
y_pred_test = model.predict(X_test)
st.write('Accuracy Score:')
st.info(accuracy_score(y_test, y_pred_test))
# Model parameters
st.subheader('3. Model Parameters')
st.write(model.get_params())
return model
# Disclaimer
st.markdown(
'
Disclaimer: This app works best when viewed in dark mode.
Click three dots in the top right corner -> Settings -> Choose app theme dark.
',
unsafe_allow_html=True
)
# The Machine Learning App
st.write(
"""
# Machine Learning App
Select the problem type, dataset, model, and hyperparameters.
"""
)
# Sidebar - Select problem type
problem_type = st.sidebar.selectbox("Select Problem Type", ["Regression", "Classification"])
# Sidebar - Select dataset or upload CSV file
if problem_type == "Classification":
st.sidebar.write("### Select Dataset or Upload CSV File for Classification")
dataset_option = st.sidebar.selectbox("Select Dataset", ["Iris", "Upload CSV File"])
elif problem_type == "Regression":
st.sidebar.write("### Select Dataset or Upload CSV File for Regression")
dataset_option = st.sidebar.selectbox("Select Dataset", ["California Housing", "Upload CSV File"])
# Load dataset
df = None
if dataset_option == "Iris":
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df["target"] = iris.target
elif dataset_option == "California Housing":
housing = fetch_california_housing()
df = pd.DataFrame(data=housing.data, columns=housing.feature_names)
df["target"] = housing.target
elif dataset_option == "Upload CSV File":
uploaded_file = st.sidebar.file_uploader("Upload your CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
# Sidebar - Select model and hyperparameters
st.sidebar.write("### Select Model and Hyperparameters")
if problem_type == "Regression":
regression_algorithm = st.sidebar.selectbox(
"Select Regression Algorithm",
["Linear Regression", "Ridge Regression", "Lasso Regression", "Random Forest Regression", "Gradient Boosting Regression"]
)
if regression_algorithm == "Linear Regression":
hyperparameters = {}
algorithm = LinearRegression
elif regression_algorithm == "Ridge Regression":
alpha = st.sidebar.slider("Alpha", 0.0, 1.0, 0.5, 0.01)
hyperparameters = {"alpha": alpha}
algorithm = Ridge
elif regression_algorithm == "Lasso Regression":
alpha = st.sidebar.slider("Alpha", 0.0, 1.0, 0.5, 0.01)
hyperparameters = {"alpha": alpha}
algorithm = Lasso
elif regression_algorithm == "Random Forest Regression":
n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100)
max_features = st.sidebar.selectbox("Max Features", ["sqrt", "log2"])
hyperparameters = {"n_estimators": n_estimators, "max_features": max_features}
algorithm = RandomForestRegressor
elif regression_algorithm == "Gradient Boosting Regression":
n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100)
learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1, 0.01)
hyperparameters = {"n_estimators": n_estimators, "learning_rate": learning_rate}
algorithm = GradientBoostingRegressor
elif problem_type == "Classification":
classification_algorithm = st.sidebar.selectbox(
"Select Classification Algorithm",
["Logistic Regression", "Support Vector Classifier", "Random Forest Classifier", "Gradient Boosting Classifier"]
)
if classification_algorithm == "Logistic Regression":
hyperparameters = {"max_iter": 1000}
algorithm = LogisticRegression
elif classification_algorithm == "Support Vector Classifier":
kernel = st.sidebar.selectbox("Kernel", ["linear", "poly", "rbf", "sigmoid"])
hyperparameters = {"kernel": kernel}
algorithm = SVC
elif classification_algorithm == "Random Forest Classifier":
n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100)
max_features = st.sidebar.selectbox("Max Features", ["sqrt", "log2"])
hyperparameters = {"n_estimators": n_estimators, "max_features": max_features}
algorithm = RandomForestClassifier
elif classification_algorithm == "Gradient Boosting Classifier":
n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100)
learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1, 0.01)
hyperparameters = {"n_estimators": n_estimators, "learning_rate": learning_rate}
algorithm = GradientBoostingClassifier
# Main panel
st.write("## Results")
if df is not None:
st.write("### Data Preview")
st.write("First 20 rows of the dataset:")
st.write(df.head(20)) # Displaying the first 20 rows of the dataset
if problem_type == "Regression":
model = build_regression_model(df, algorithm, hyperparameters)
st.write("### Regression Model")
st.write("Model:", algorithm.__name__)
elif problem_type == "Classification":
model = build_classification_model(df, algorithm, hyperparameters)
st.write("### Classification Model")
st.write("Model:", algorithm.__name__)
else:
st.info("Please upload a CSV file or select an example dataset.")
# Text in the top-right corner
st.markdown('Created by - RAJDEEP CHAKRAVORTY
', unsafe_allow_html=True)