Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

0665d1a

verified ·

1 Parent(s): ff625a6

Create app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import streamlit as st
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+from tabulate import tabulate
+# File uploader
+st.title("Model Training with Metrics")
+uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
+if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file)
+    # Show the dataset
+    st.write("Dataset:")
+    st.dataframe(df)
+    # Model Training Section
+    st.subheader("Model Training")
+    if df.empty:
+        st.warning("The dataset is empty. Please upload a valid CSV file.")
+    else:
+        target = st.selectbox("Select Target Variable", df.columns)
+        features = [col for col in df.columns if col != target]
+        X = df[features]
+        y = df[target]
+        # Determine if the target is continuous or categorical
+        is_classification = y.dtype == 'object' or len(y.unique()) <= 10  # If target is categorical or has few unique values, treat as classification
+        # Ensure there is enough data before proceeding with train-test split
+        if len(X) == 0 or len(y) == 0:
+            st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
+        else:
+            # Split the data into training and test sets with customizable training size
+            train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
+            # List of classifiers to evaluate
+            classifiers = {
+                'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
+                'Decision Tree': DecisionTreeClassifier(),
+                'Random Forest': RandomForestClassifier(),
+                'Support Vector Machine (SVM)': SVC(),
+                'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
+                'Naive Bayes': GaussianNB()
+            }
+            # Initialize results storage
+            metrics = []
+            # Train and evaluate each model
+            for name, classifier in classifiers.items():
+                # Train the model
+                classifier.fit(X_train, y_train)
+                # Make predictions
+                y_pred = classifier.predict(X_test)
+                # Evaluate metrics
+                accuracy = accuracy_score(y_test, y_pred)
+                precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
+                recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
+                f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
+                metrics.append({
+                    'Model': name,
+                    'Accuracy': round(accuracy, 2),
+                    'Precision': round(precision, 2),
+                    'Recall': round(recall, 2),
+                    'F1-Score': round(f1, 2)
+                })
+            # Create a metrics DataFrame
+            metrics_df = pd.DataFrame(metrics)
+            # Add bold formatting to the headers for tabulate
+            bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
+            # Format table with tabulate
+            table = tabulate(
+                metrics_df,
+                headers=bold_headers,
+                tablefmt="fancy_grid",
+                showindex=False,
+                numalign="center",
+                stralign="center"
+            )
+            # Display results in Streamlit
+            st.subheader("Model Performance Metrics")
+            st.markdown(f"**Model Performance Metrics**")
+            st.text(table)
+            # Option to download the model performance metrics (Results Table)
+            st.download_button(
+                label="Download Model Report",
+                data=metrics_df.to_csv(index=False),
+                file_name="model_report.csv",
+                mime="text/csv"
+            )
+            # Option to download the dataset
+            st.download_button(
+                label="Download Dataset",
+                data=df.to_csv(index=False),
+                file_name="dataset.csv",
+                mime="text/csv"
+            )