Spaces:

Dhanush7080
/

AutoMl-DataScience-Workflow

Sleeping

App Files Files Community

AutoMl-DataScience-Workflow / app.py

Dhanush7080

Rename automl_app.py to app.py

a84089e verified over 1 year ago

Raw

History Blame Contribute Delete

3.99 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
	from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
	from sklearn.preprocessing import LabelEncoder
	import os
	import pickle

	# App title
	st.title("Advanced Data Science Workflow with Auto ML 🚀📊")
	st.write("Automate your data science workflow with feature engineering, model training, and deployment!")

	# File upload
	uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
	if uploaded_file:
	st.write("Debug: File uploaded successfully!")

	# Load the dataset
	df = pd.read_csv(uploaded_file)
	st.write("### Data Preview")
	st.dataframe(df.head())

	# Data profiling
	st.write("### Dataset Summary")
	st.write(df.describe())

	# Missing values
	st.write("### Missing Value Analysis")
	st.write(df.isnull().sum())

	# Feature engineering
	st.write("### Feature Engineering")
	st.write("Choose the target column for model training:")
	target_column = st.selectbox("Target Column", df.columns)

	# Handle categorical data
	st.write("### Categorical Encoding")
	categorical_columns = df.select_dtypes(include=['object']).columns
	for col in categorical_columns:
	st.write(f"Encoding column: {col}")
	encoder = LabelEncoder()
	df[col] = encoder.fit_transform(df[col].astype(str))

	# Handle missing values
	st.write("### Handle Missing Values")
	df = df.fillna(df.median())
	st.write("Missing values filled with column medians.")

	# Column selection
	st.write("### Select Features")
	selected_columns = st.multiselect("Select columns to keep for training", options=df.columns, default=df.columns)
	df = df[selected_columns]
	st.write("Updated dataset:")
	st.dataframe(df.head())

	# Split dataset into train and test
	st.write("### Train-Test Split")
	test_size = st.slider("Select test size (in %)", min_value=10, max_value=50, value=20, step=5)
	test_size = test_size / 100

	X = df.drop(columns=[target_column])
	y = df[target_column]
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

	st.write("Train and test data prepared.")

	# Model selection and training
	st.write("### Model Training")
	problem_type = st.radio("Choose the type of problem:", ["Classification", "Regression"])

	if problem_type == "Classification":
	model = RandomForestClassifier(random_state=42)
	model.fit(X_train, y_train)
	predictions = model.predict(X_test)

	# Performance metrics
	accuracy = accuracy_score(y_test, predictions)
	f1 = f1_score(y_test, predictions, average='weighted')
	st.write(f"Accuracy: {accuracy:.2f}")
	st.write(f"F1 Score: {f1:.2f}")

	else:
	model = RandomForestRegressor(random_state=42)
	model.fit(X_train, y_train)
	predictions = model.predict(X_test)

	# Performance metrics
	mae = mean_absolute_error(y_test, predictions)
	rmse = np.sqrt(mean_squared_error(y_test, predictions))
	r2 = r2_score(y_test, predictions)
	st.write(f"Mean Absolute Error: {mae:.2f}")
	st.write(f"Root Mean Squared Error: {rmse:.2f}")
	st.write(f"R2 Score: {r2:.2f}")

	# Save the model
	st.write("### Save Trained Model")
	if st.button("Download Model"):
	model_filename = "trained_model.pkl"
	with open(model_filename, "wb") as file:
	pickle.dump(model, file)
	st.download_button(label="Download Trained Model", data=open(model_filename, "rb"), file_name=model_filename)

	# Footer
	st.write("---")
	st.write("Developed by Dhanush. Empowering data scientists worldwide!")