| import streamlit as st
|
| import pandas as pd
|
| import numpy as np
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
| from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
|
| from sklearn.preprocessing import LabelEncoder
|
| import os
|
| import pickle
|
|
|
|
|
| st.title("Advanced Data Science Workflow with Auto ML ππ")
|
| st.write("Automate your data science workflow with feature engineering, model training, and deployment!")
|
|
|
|
|
| uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
|
| if uploaded_file:
|
| st.write("Debug: File uploaded successfully!")
|
|
|
|
|
| df = pd.read_csv(uploaded_file)
|
| st.write("### Data Preview")
|
| st.dataframe(df.head())
|
|
|
|
|
| st.write("### Dataset Summary")
|
| st.write(df.describe())
|
|
|
|
|
| st.write("### Missing Value Analysis")
|
| st.write(df.isnull().sum())
|
|
|
|
|
| st.write("### Feature Engineering")
|
| st.write("Choose the target column for model training:")
|
| target_column = st.selectbox("Target Column", df.columns)
|
|
|
|
|
| st.write("### Categorical Encoding")
|
| categorical_columns = df.select_dtypes(include=['object']).columns
|
| for col in categorical_columns:
|
| st.write(f"Encoding column: {col}")
|
| encoder = LabelEncoder()
|
| df[col] = encoder.fit_transform(df[col].astype(str))
|
|
|
|
|
| st.write("### Handle Missing Values")
|
| df = df.fillna(df.median())
|
| st.write("Missing values filled with column medians.")
|
|
|
|
|
| st.write("### Select Features")
|
| selected_columns = st.multiselect("Select columns to keep for training", options=df.columns, default=df.columns)
|
| df = df[selected_columns]
|
| st.write("Updated dataset:")
|
| st.dataframe(df.head())
|
|
|
|
|
| st.write("### Train-Test Split")
|
| test_size = st.slider("Select test size (in %)", min_value=10, max_value=50, value=20, step=5)
|
| test_size = test_size / 100
|
|
|
| X = df.drop(columns=[target_column])
|
| y = df[target_column]
|
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
|
|
|
| st.write("Train and test data prepared.")
|
|
|
|
|
| st.write("### Model Training")
|
| problem_type = st.radio("Choose the type of problem:", ["Classification", "Regression"])
|
|
|
| if problem_type == "Classification":
|
| model = RandomForestClassifier(random_state=42)
|
| model.fit(X_train, y_train)
|
| predictions = model.predict(X_test)
|
|
|
|
|
| accuracy = accuracy_score(y_test, predictions)
|
| f1 = f1_score(y_test, predictions, average='weighted')
|
| st.write(f"Accuracy: {accuracy:.2f}")
|
| st.write(f"F1 Score: {f1:.2f}")
|
|
|
| else:
|
| model = RandomForestRegressor(random_state=42)
|
| model.fit(X_train, y_train)
|
| predictions = model.predict(X_test)
|
|
|
|
|
| mae = mean_absolute_error(y_test, predictions)
|
| rmse = np.sqrt(mean_squared_error(y_test, predictions))
|
| r2 = r2_score(y_test, predictions)
|
| st.write(f"Mean Absolute Error: {mae:.2f}")
|
| st.write(f"Root Mean Squared Error: {rmse:.2f}")
|
| st.write(f"R2 Score: {r2:.2f}")
|
|
|
|
|
| st.write("### Save Trained Model")
|
| if st.button("Download Model"):
|
| model_filename = "trained_model.pkl"
|
| with open(model_filename, "wb") as file:
|
| pickle.dump(model, file)
|
| st.download_button(label="Download Trained Model", data=open(model_filename, "rb"), file_name=model_filename)
|
|
|
|
|
| st.write("---")
|
| st.write("Developed by Dhanush. Empowering data scientists worldwide!") |