Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

85d20ed

verified ·

1 Parent(s): 97a2e91

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -100

app.py CHANGED Viewed

@@ -1,107 +1,63 @@
-import streamlit as st
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder, StandardScaler
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.linear_model import LogisticRegression, LinearRegression
-from sklearn.svm import SVC, SVR
-from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.naive_bayes import GaussianNB
-from sklearn.metrics import accuracy_score, mean_squared_error
-# File uploader
-st.title("Model Training")
-uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
-if uploaded_file is not None:
-    df = pd.read_csv(uploaded_file)
-    # Show the dataset
-    st.write("Dataset:")
-    st.dataframe(df)
-    # Model Training Section
-    st.subheader("Model Training")
-    if df.empty:
-        st.warning("The dataset is empty. Please upload a valid CSV file.")
-    else:
-        target = st.selectbox("Select Target Variable", df.columns)
-        features = [col for col in df.columns if col != target]
-        X = df[features]
-        y = df[target]
-        # Determine if the target is continuous or categorical
-        is_classification = y.dtype == 'object' or len(y.unique()) <= 10  # If target is categorical or has few unique values, treat as classification
-        # Ensure there is enough data before proceeding with train-test split
-        if len(X) == 0 or len(y) == 0:
-            st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
-        else:
-            # Split the data into training and test sets with customizable training size
-            train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
-            # Store results in a dictionary
-            results = []
-            # Model Selection and Evaluation (For Classification)
-            if is_classification:
-                model_choices = [
-                    ("Random Forest", RandomForestClassifier(n_estimators=50)),
-                    ("Logistic Regression", LogisticRegression(max_iter=1000)),
-                    ("SVM", SVC()),
-                    ("K-Nearest Neighbors", KNeighborsClassifier(n_neighbors=5)),
-                    ("Decision Tree", DecisionTreeClassifier()),
-                    ("Naive Bayes", GaussianNB())
-                ]
-                for name, model in model_choices:
-                    model.fit(X_train, y_train)
-                    y_pred = model.predict(X_test)
-                    accuracy = accuracy_score(y_test, y_pred)
-                    results.append([name, accuracy])
-                # Display results in a table
-                st.subheader("Model Performance Results")
-                results_df = pd.DataFrame(results, columns=["Model", "Accuracy"])
-                st.markdown(f"**Model Performance Results**")
-                st.dataframe(results_df)
-            # For Regression
-            else:
-                model_choices = [
-                    ("Random Forest", RandomForestRegressor(n_estimators=50)),
-                    ("Linear Regression", LinearRegression()),
-                    ("SVR", SVR()),
-                    ("K-Nearest Neighbors", KNeighborsRegressor(n_neighbors=5)),
-                    ("Decision Tree", DecisionTreeRegressor()),
-                ]
-                for name, model in model_choices:
-                    model.fit(X_train, y_train)
-                    y_pred = model.predict(X_test)
-                    mse = mean_squared_error(y_test, y_pred)
-                    results.append([name, mse])
-                # Display results in a table
-                st.subheader("Model Performance Results")
-                results_df = pd.DataFrame(results, columns=["Model", "Mean Squared Error"])
-                st.markdown(f"**Model Performance Results**")
-                st.dataframe(results_df)
-            # Option to download the model performance metrics (Results Table)
-            st.download_button(
-                label="Download Model Report",
-                data=results_df.to_csv(index=False),
-                file_name="model_report.csv",
-                mime="text/csv"
-            )
-            # Option to download the dataset
-            st.download_button(
-                label="Download Dataset",
-                data=df.to_csv(index=False),
-                file_name="dataset.csv",
-                mime="text/csv"
-            )

+from tabulate import tabulate
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
+# List of classifiers to evaluate
+classifiers = {
+    'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
+    'Decision Tree': DecisionTreeClassifier(),
+    'Random Forest': RandomForestClassifier(),
+    'Support Vector Machine (SVM)': SVC(),
+    'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
+    'Naive Bayes': GaussianNB()
+}
+# Initialize results storage
+predictions = pd.DataFrame()
+metrics = []
+# Train and evaluate each model
+for name, classifier in classifiers.items():
+    # Train the model
+    classifier.fit(X_train, y_train)
+    # Make predictions
+    y_pred = classifier.predict(X_test)
+    predictions[name] = y_pred  # Store predictions
+    # Evaluate metrics
+    accuracy = accuracy_score(y_test, y_pred)
+    precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
+    recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
+    f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
+    metrics.append({
+        'Model': name,
+        'Accuracy': round(accuracy, 2),
+        'Precision': round(precision, 2),
+        'Recall': round(recall, 2),
+        'F1-Score': round(f1, 2)
+    })
+# Create a metrics DataFrame
+metrics_df = pd.DataFrame(metrics)
+# Add bold formatting to the headers
+bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
+# Format table with tabulate
+table = tabulate(
+    metrics_df,
+    headers=bold_headers,
+    tablefmt="fancy_grid",
+    showindex=False,
+    numalign="center",
+    stralign="center"
+)
+# Add spacing for a larger table
+print(f"\033[1m{'Model Performance Metrics'.center(80)}\033[0m")  # Bold title
+print(table.center(120))  # Center align the table for larger width
+print("\n" + "=" * 80)