Spaces:

Shubham-10000
/

No_code_ML_builder

Sleeping

No_code_ML_builder / src /model_training.py

Shubham 10000

update to main file

cb74654 6 months ago

4.66 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import os
	import joblib
	import matplotlib.pyplot as plt
	import sklearn
	import seaborn as sns
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import (
	accuracy_score, mean_squared_error, r2_score,
	classification_report, confusion_matrix
	)
	from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
	from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
	from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
	from sklearn.linear_model import LinearRegression

	import warnings
	warnings.filterwarnings("ignore")

	def model_training():
	st.title("Model Training & Evaluation")

	# Step 1: Load preprocessed data
	files = os.listdir('preprocessed_data') if os.path.exists('preprocessed_data') else []
	if not files:
	st.warning("No preprocessed data found. Please preprocess a file first.")
	return

	selected_file = st.selectbox("Select a preprocessed CSV file", files)
	df = pd.read_csv(os.path.join('preprocessed_data', selected_file))
	st.write("Data Preview:")
	st.dataframe(df.head())

	# Step 2: Select target column
	target_column = st.selectbox("Select Target Column", df.columns)
	X = df.drop(columns=[target_column])
	y = df[target_column]

	# Step 3: Detect problem type
	if y.nunique() <= 20 and y.dtype in ['int64', 'int32']:
	problem_type = "classification"
	st.info("Detected as Classification Problem")
	else:
	problem_type = "regression"
	st.info("Detected as Regression Problem")

	# Step 4: Train/Test Split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Step 5: Define models
	if problem_type == "classification":
	models = {
	"Random Forest Classifier": RandomForestClassifier(),
	"Decision Tree Classifier": DecisionTreeClassifier(),
	"KNN Classifier": KNeighborsClassifier()
	}
	else:
	models = {
	"Random Forest Regressor": RandomForestRegressor(),
	"Decision Tree Regressor": DecisionTreeRegressor(),
	"KNN Regressor": KNeighborsRegressor(),
	"Linear Regression": LinearRegression()
	}

	# Step 6: Train, Evaluate, and Display Metrics
	results = []
	for name, model in models.items():
	model.fit(X_train, y_train)
	y_pred = model.predict(X_test)

	if problem_type == "classification":
	score = (accuracy_score(y_test, y_pred))*100
	else:
	score = (r2_score(y_test, y_pred))*100

	results.append((name, score, model, y_pred))

	# Step 7: Show model performance table
	results_df = pd.DataFrame(results, columns=["Model", "Score", "Trained_Model", "Predictions"])
	st.write("Model Performance:")
	st.dataframe(results_df[["Model", "Score"]])

	# Step 8: Best Model Selection
	best_model_row = results_df.loc[results_df["Score"].idxmax()]
	st.success(f"Best Model: {best_model_row['Model']} with Score: {best_model_row['Score']:.4f}")
	#SHOW NAME OF BEST MODEL
	st.write("Best Model Details:", best_model_row)
	best_model = best_model_row["Trained_Model"]


	# Step 9: Detailed Metrics for Best Model
	st.subheader("Detailed Metrics for Best Model")
	best_y_pred = best_model_row["Predictions"]

	if problem_type == "classification":
	st.write("Accuracy Score:", accuracy_score(y_test, best_y_pred))
	st.text("Classification Report:")
	st.text(classification_report(y_test, best_y_pred))

	# Confusion Matrix
	cm = confusion_matrix(y_test, best_y_pred)
	fig, ax = plt.subplots()
	sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
	ax.set_xlabel("Predicted")
	ax.set_ylabel("Actual")
	st.pyplot(fig)

	else:
	st.write("Mean Squared Error:", np.sqrt(mean_squared_error(y_test, best_y_pred))*100, "%")
	st.write("R² Score:", r2_score(y_test, best_y_pred))

	# Step 10: Save & Download Best Model
	if st.button("Save Best Model"):
	st.write("Saving the best model...")
	st.write(f"Model Name: {best_model_row['Model']}")
	os.makedirs("saved_models", exist_ok=True)
	model_path = f"saved_models/{best_model_row['Model'].replace(' ', '_')}.pkl"
	joblib.dump(best_model_row["Trained_Model"], model_path)
	st.download_button(
	label="Download Model",
	data=open(model_path, "rb").read(),
	file_name=os.path.basename(model_path)
	)