Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

35ca176

verified ·

1 Parent(s): 85d20ed

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -59

app.py CHANGED Viewed

@@ -1,63 +1,116 @@
-from tabulate import tabulate
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
-# Split the data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
-# List of classifiers to evaluate
-classifiers = {
-    'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
-    'Decision Tree': DecisionTreeClassifier(),
-    'Random Forest': RandomForestClassifier(),
-    'Support Vector Machine (SVM)': SVC(),
-    'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
-    'Naive Bayes': GaussianNB()
-}
-# Initialize results storage
-predictions = pd.DataFrame()
-metrics = []
-# Train and evaluate each model
-for name, classifier in classifiers.items():
-    # Train the model
-    classifier.fit(X_train, y_train)
-    # Make predictions
-    y_pred = classifier.predict(X_test)
-    predictions[name] = y_pred  # Store predictions
-    # Evaluate metrics
-    accuracy = accuracy_score(y_test, y_pred)
-    precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
-    recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
-    f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
-    metrics.append({
-        'Model': name,
-        'Accuracy': round(accuracy, 2),
-        'Precision': round(precision, 2),
-        'Recall': round(recall, 2),
-        'F1-Score': round(f1, 2)
-    })
-# Create a metrics DataFrame
-metrics_df = pd.DataFrame(metrics)
-# Add bold formatting to the headers
-bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
-# Format table with tabulate
-table = tabulate(
-    metrics_df,
-    headers=bold_headers,
-    tablefmt="fancy_grid",
-    showindex=False,
-    numalign="center",
-    stralign="center"
-)
-# Add spacing for a larger table
-print(f"\033[1m{'Model Performance Metrics'.center(80)}\033[0m")  # Bold title
-print(table.center(120))  # Center align the table for larger width
-print("\n" + "=" * 80)

+import streamlit as st
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.naive_bayes import GaussianNB
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+from tabulate import tabulate
+# File uploader
+st.title("Model Training with Metrics")
+uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
+if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file)
+    # Show the dataset
+    st.write("Dataset:")
+    st.dataframe(df)
+    # Model Training Section
+    st.subheader("Model Training")
+    if df.empty:
+        st.warning("The dataset is empty. Please upload a valid CSV file.")
+    else:
+        target = st.selectbox("Select Target Variable", df.columns)
+        features = [col for col in df.columns if col != target]
+        X = df[features]
+        y = df[target]
+        # Determine if the target is continuous or categorical
+        is_classification = y.dtype == 'object' or len(y.unique()) <= 10  # If target is categorical or has few unique values, treat as classification
+        # Ensure there is enough data before proceeding with train-test split
+        if len(X) == 0 or len(y) == 0:
+            st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
+        else:
+            # Split the data into training and test sets with customizable training size
+            train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
+            # List of classifiers to evaluate
+            classifiers = {
+                'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
+                'Decision Tree': DecisionTreeClassifier(),
+                'Random Forest': RandomForestClassifier(),
+                'Support Vector Machine (SVM)': SVC(),
+                'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
+                'Naive Bayes': GaussianNB()
+            }
+            # Initialize results storage
+            metrics = []
+            # Train and evaluate each model
+            for name, classifier in classifiers.items():
+                # Train the model
+                classifier.fit(X_train, y_train)
+                # Make predictions
+                y_pred = classifier.predict(X_test)
+                # Evaluate metrics
+                accuracy = accuracy_score(y_test, y_pred)
+                precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
+                recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
+                f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
+                metrics.append({
+                    'Model': name,
+                    'Accuracy': round(accuracy, 2),
+                    'Precision': round(precision, 2),
+                    'Recall': round(recall, 2),
+                    'F1-Score': round(f1, 2)
+                })
+            # Create a metrics DataFrame
+            metrics_df = pd.DataFrame(metrics)
+            # Add bold formatting to the headers for tabulate
+            bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
+            # Format table with tabulate
+            table = tabulate(
+                metrics_df,
+                headers=bold_headers,
+                tablefmt="fancy_grid",
+                showindex=False,
+                numalign="center",
+                stralign="center"
+            )
+            # Display results in Streamlit
+            st.subheader("Model Performance Metrics")
+            st.markdown(f"**Model Performance Metrics**")
+            st.text(table)
+            # Option to download the model performance metrics (Results Table)
+            st.download_button(
+                label="Download Model Report",
+                data=metrics_df.to_csv(index=False),
+                file_name="model_report.csv",
+                mime="text/csv"
+            )
+            # Option to download the dataset
+            st.download_button(
+                label="Download Dataset",
+                data=df.to_csv(index=False),
+                file_name="dataset.csv",
+                mime="text/csv"
+            )