Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

ModelTrain / app.py

saherPervaiz

Update app.py

ceaabd5 verified over 1 year ago

raw

history blame

7.17 kB

	import streamlit as st
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import LabelEncoder
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.svm import SVC
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
	import matplotlib.pyplot as plt
	import seaborn as sns

	# File uploader
	st.title("Model Training with Metrics")
	uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])

	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)

	# Show the dataset
	st.write("Dataset:")
	st.dataframe(df)

	# Model Training Section
	st.subheader("Model Training")
	if df.empty:
	st.warning("The dataset is empty. Please upload a valid CSV file.")
	else:
	target = st.selectbox("Select Target Variable", df.columns)
	features = [col for col in df.columns if col != target]
	X = df[features]
	y = df[target]

	# Determine if the target is continuous or categorical
	is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification

	# Ensure there is enough data before proceeding with train-test split
	if len(X) == 0 or len(y) == 0:
	st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
	else:
	# Split the data into training and test sets with customizable training size
	train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)

	# List of classifiers to evaluate
	classifiers = {
	'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
	'Decision Tree': DecisionTreeClassifier(),
	'Random Forest': RandomForestClassifier(),
	'Support Vector Machine (SVM)': SVC(),
	'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
	'Naive Bayes': GaussianNB()
	}

	# Initialize results storage
	metrics = []

	# Train and evaluate each model
	for name, classifier in classifiers.items():
	# Train the model
	classifier.fit(X_train, y_train)

	# Make predictions
	y_pred = classifier.predict(X_test)

	# Evaluate metrics
	accuracy = accuracy_score(y_test, y_pred)
	precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
	recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
	f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')

	metrics.append({
	'Model': name,
	'Accuracy': round(accuracy, 2),
	'Precision': round(precision, 2),
	'Recall': round(recall, 2),
	'F1-Score': round(f1, 2)
	})

	# Create a metrics DataFrame
	metrics_df = pd.DataFrame(metrics)

	# Display results in a table using st.dataframe
	st.subheader("Model Performance Metrics")
	st.dataframe(metrics_df)

	# Download options
	st.subheader("Download Model Performance Report in Different Formats")

	# CSV
	st.download_button(
	label="Download as CSV",
	data=metrics_df.to_csv(index=False),
	file_name="model_report.csv",
	mime="text/csv"
	)

	# Excel
	st.download_button(
	label="Download as Excel",
	data=metrics_df.to_excel(index=False, engine='openpyxl'),
	file_name="model_report.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	# JSON
	st.download_button(
	label="Download as JSON",
	data=metrics_df.to_json(orient='records'),
	file_name="model_report.json",
	mime="application/json"
	)

	# PDF (using `fpdf` library)
	from fpdf import FPDF

	def generate_pdf(df):
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=12)
	pdf.cell(200, 10, txt="Model Performance Report", ln=True, align="C")
	pdf.ln(10)

	# Add table header
	pdf.set_font("Arial", style='B', size=10)
	for header in df.columns:
	pdf.cell(40, 10, header, border=1)
	pdf.ln()

	# Add table rows
	pdf.set_font("Arial", size=10)
	for row in df.values:
	for value in row:
	pdf.cell(40, 10, str(value), border=1)
	pdf.ln()

	return pdf.output(dest='S').encode('latin1')

	# PDF download
	st.download_button(
	label="Download as PDF",
	data=generate_pdf(metrics_df),
	file_name="model_report.pdf",
	mime="application/pdf"
	)

	# Option to download the dataset
	st.download_button(
	label="Download Dataset",
	data=df.to_csv(index=False),
	file_name="dataset.csv",
	mime="text/csv"
	)

	# Generate and download PNG report
	st.subheader("Download Report as PNG")

	# Create table plot using matplotlib
	fig, ax = plt.subplots(figsize=(12, 4)) # Adjust the figure size to match the table's layout
	ax.axis('tight')
	ax.axis('off')
	table_data = metrics_df.values
	table_columns = metrics_df.columns.tolist()

	table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center', colLoc='center')
	table.auto_set_font_size(False)
	table.set_fontsize(10)
	table.scale(1.2, 1.2) # Adjust the scale for better appearance

	# Save the table as a PNG file
	png_file = "model_report.png"
	fig.savefig(png_file, bbox_inches='tight', dpi=300)

	# Provide a download button for the PNG file
	with open(png_file, "rb") as file:
	st.download_button(
	label="Download as PNG",
	data=file,
	file_name="model_report.png",
	mime="image/png"
	)