ModelTrain / app.py
saherPervaiz's picture
Update app.py
ceaabd5 verified
raw
history blame
7.17 kB
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
# File uploader
st.title("Model Training with Metrics")
uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
# Show the dataset
st.write("Dataset:")
st.dataframe(df)
# Model Training Section
st.subheader("Model Training")
if df.empty:
st.warning("The dataset is empty. Please upload a valid CSV file.")
else:
target = st.selectbox("Select Target Variable", df.columns)
features = [col for col in df.columns if col != target]
X = df[features]
y = df[target]
# Determine if the target is continuous or categorical
is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification
# Ensure there is enough data before proceeding with train-test split
if len(X) == 0 or len(y) == 0:
st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
else:
# Split the data into training and test sets with customizable training size
train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
# List of classifiers to evaluate
classifiers = {
'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
'Decision Tree': DecisionTreeClassifier(),
'Random Forest': RandomForestClassifier(),
'Support Vector Machine (SVM)': SVC(),
'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
'Naive Bayes': GaussianNB()
}
# Initialize results storage
metrics = []
# Train and evaluate each model
for name, classifier in classifiers.items():
# Train the model
classifier.fit(X_train, y_train)
# Make predictions
y_pred = classifier.predict(X_test)
# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
metrics.append({
'Model': name,
'Accuracy': round(accuracy, 2),
'Precision': round(precision, 2),
'Recall': round(recall, 2),
'F1-Score': round(f1, 2)
})
# Create a metrics DataFrame
metrics_df = pd.DataFrame(metrics)
# Display results in a table using st.dataframe
st.subheader("Model Performance Metrics")
st.dataframe(metrics_df)
# Download options
st.subheader("Download Model Performance Report in Different Formats")
# CSV
st.download_button(
label="Download as CSV",
data=metrics_df.to_csv(index=False),
file_name="model_report.csv",
mime="text/csv"
)
# Excel
st.download_button(
label="Download as Excel",
data=metrics_df.to_excel(index=False, engine='openpyxl'),
file_name="model_report.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
# JSON
st.download_button(
label="Download as JSON",
data=metrics_df.to_json(orient='records'),
file_name="model_report.json",
mime="application/json"
)
# PDF (using `fpdf` library)
from fpdf import FPDF
def generate_pdf(df):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Model Performance Report", ln=True, align="C")
pdf.ln(10)
# Add table header
pdf.set_font("Arial", style='B', size=10)
for header in df.columns:
pdf.cell(40, 10, header, border=1)
pdf.ln()
# Add table rows
pdf.set_font("Arial", size=10)
for row in df.values:
for value in row:
pdf.cell(40, 10, str(value), border=1)
pdf.ln()
return pdf.output(dest='S').encode('latin1')
# PDF download
st.download_button(
label="Download as PDF",
data=generate_pdf(metrics_df),
file_name="model_report.pdf",
mime="application/pdf"
)
# Option to download the dataset
st.download_button(
label="Download Dataset",
data=df.to_csv(index=False),
file_name="dataset.csv",
mime="text/csv"
)
# Generate and download PNG report
st.subheader("Download Report as PNG")
# Create table plot using matplotlib
fig, ax = plt.subplots(figsize=(12, 4)) # Adjust the figure size to match the table's layout
ax.axis('tight')
ax.axis('off')
table_data = metrics_df.values
table_columns = metrics_df.columns.tolist()
table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center', colLoc='center')
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2) # Adjust the scale for better appearance
# Save the table as a PNG file
png_file = "model_report.png"
fig.savefig(png_file, bbox_inches='tight', dpi=300)
# Provide a download button for the PNG file
with open(png_file, "rb") as file:
st.download_button(
label="Download as PNG",
data=file,
file_name="model_report.png",
mime="image/png"
)