Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.svm import SVC | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.naive_bayes import GaussianNB | |
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| # File uploader | |
| st.title("Model Training with Metrics") | |
| uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"]) | |
| if uploaded_file is not None: | |
| df = pd.read_csv(uploaded_file) | |
| # Show the dataset | |
| st.write("Dataset:") | |
| st.dataframe(df) | |
| # Model Training Section | |
| st.subheader("Model Training") | |
| if df.empty: | |
| st.warning("The dataset is empty. Please upload a valid CSV file.") | |
| else: | |
| target = st.selectbox("Select Target Variable", df.columns) | |
| features = [col for col in df.columns if col != target] | |
| X = df[features] | |
| y = df[target] | |
| # Determine if the target is continuous or categorical | |
| is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification | |
| # Ensure there is enough data before proceeding with train-test split | |
| if len(X) == 0 or len(y) == 0: | |
| st.warning("Insufficient data. Please ensure there are valid feature and target columns.") | |
| else: | |
| # Split the data into training and test sets with customizable training size | |
| train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42) | |
| # List of classifiers to evaluate | |
| classifiers = { | |
| 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'), | |
| 'Decision Tree': DecisionTreeClassifier(), | |
| 'Random Forest': RandomForestClassifier(), | |
| 'Support Vector Machine (SVM)': SVC(), | |
| 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(), | |
| 'Naive Bayes': GaussianNB() | |
| } | |
| # Initialize results storage | |
| metrics = [] | |
| # Train and evaluate each model | |
| for name, classifier in classifiers.items(): | |
| # Train the model | |
| classifier.fit(X_train, y_train) | |
| # Make predictions | |
| y_pred = classifier.predict(X_test) | |
| # Evaluate metrics | |
| accuracy = accuracy_score(y_test, y_pred) | |
| precision = precision_score(y_test, y_pred, zero_division=1, average='macro') | |
| recall = recall_score(y_test, y_pred, zero_division=1, average='macro') | |
| f1 = f1_score(y_test, y_pred, zero_division=1, average='macro') | |
| metrics.append({ | |
| 'Model': name, | |
| 'Accuracy': round(accuracy, 2), | |
| 'Precision': round(precision, 2), | |
| 'Recall': round(recall, 2), | |
| 'F1-Score': round(f1, 2) | |
| }) | |
| # Create a metrics DataFrame | |
| metrics_df = pd.DataFrame(metrics) | |
| # Display results in a table using st.dataframe | |
| st.subheader("Model Performance Metrics") | |
| st.dataframe(metrics_df) | |
| # Download options | |
| st.subheader("Download Model Performance Report in Different Formats") | |
| # CSV | |
| st.download_button( | |
| label="Download as CSV", | |
| data=metrics_df.to_csv(index=False), | |
| file_name="model_report.csv", | |
| mime="text/csv" | |
| ) | |
| # Excel | |
| st.download_button( | |
| label="Download as Excel", | |
| data=metrics_df.to_excel(index=False, engine='openpyxl'), | |
| file_name="model_report.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| # JSON | |
| st.download_button( | |
| label="Download as JSON", | |
| data=metrics_df.to_json(orient='records'), | |
| file_name="model_report.json", | |
| mime="application/json" | |
| ) | |
| # PDF (using `fpdf` library) | |
| from fpdf import FPDF | |
| def generate_pdf(df): | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_font("Arial", size=12) | |
| pdf.cell(200, 10, txt="Model Performance Report", ln=True, align="C") | |
| pdf.ln(10) | |
| # Add table header | |
| pdf.set_font("Arial", style='B', size=10) | |
| for header in df.columns: | |
| pdf.cell(40, 10, header, border=1) | |
| pdf.ln() | |
| # Add table rows | |
| pdf.set_font("Arial", size=10) | |
| for row in df.values: | |
| for value in row: | |
| pdf.cell(40, 10, str(value), border=1) | |
| pdf.ln() | |
| return pdf.output(dest='S').encode('latin1') | |
| # PDF download | |
| st.download_button( | |
| label="Download as PDF", | |
| data=generate_pdf(metrics_df), | |
| file_name="model_report.pdf", | |
| mime="application/pdf" | |
| ) | |
| # Option to download the dataset | |
| st.download_button( | |
| label="Download Dataset", | |
| data=df.to_csv(index=False), | |
| file_name="dataset.csv", | |
| mime="text/csv" | |
| ) | |
| # Generate and download PNG report | |
| st.subheader("Download Report as PNG") | |
| # Create table plot using matplotlib | |
| fig, ax = plt.subplots(figsize=(12, 4)) # Adjust the figure size to match the table's layout | |
| ax.axis('tight') | |
| ax.axis('off') | |
| table_data = metrics_df.values | |
| table_columns = metrics_df.columns.tolist() | |
| table = ax.table(cellText=table_data, colLabels=table_columns, loc='center', cellLoc='center', colLoc='center') | |
| table.auto_set_font_size(False) | |
| table.set_fontsize(10) | |
| table.scale(1.2, 1.2) # Adjust the scale for better appearance | |
| # Save the table as a PNG file | |
| png_file = "model_report.png" | |
| fig.savefig(png_file, bbox_inches='tight', dpi=300) | |
| # Provide a download button for the PNG file | |
| with open(png_file, "rb") as file: | |
| st.download_button( | |
| label="Download as PNG", | |
| data=file, | |
| file_name="model_report.png", | |
| mime="image/png" | |
| ) | |