Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

1b7c47b

verified ·

1 Parent(s): 8ab0d02

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -55

app.py CHANGED Viewed

@@ -6,13 +6,14 @@ import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder, StandardScaler
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge
-from sklearn.svm import SVC, SVR
-from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.naive_bayes import GaussianNB
-from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
 from scipy import stats
 # File uploader
@@ -97,64 +98,61 @@ if uploaded_file is not None:
             train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
-            # Store results in a dictionary
-            results = []
-            # Model Selection and Evaluation
             if is_classification:
-                model_choices = [
-                    ("Random Forest", RandomForestClassifier(n_estimators=50)),
-                    ("Logistic Regression", LogisticRegression(max_iter=1000)),
-                    ("SVM", SVC()),
-                    ("K-Nearest Neighbors", KNeighborsClassifier(n_neighbors=5)),
-                    ("Decision Tree", DecisionTreeClassifier()),
-                    ("Naive Bayes", GaussianNB())
-                ]
-                for name, model in model_choices:
-                    model.fit(X_train, y_train)
-                    y_pred = model.predict(X_test)
-                    # Accuracy and Classification Report
                     accuracy = accuracy_score(y_test, y_pred)
-                    classification_report_output = classification_report(y_test, y_pred)
-                    # Append results
-                    results.append([name, accuracy, classification_report_output])
-            else:  # Regression models
-                model_choices = [
-                    ("Random Forest", RandomForestRegressor(n_estimators=50)),
-                    ("Linear Regression", LinearRegression()),
-                    ("SVR", SVR()),
-                    ("K-Nearest Neighbors", KNeighborsRegressor(n_neighbors=5)),
-                    ("Decision Tree", DecisionTreeRegressor()),
-                    ("Ridge Regression", Ridge())
-                ]
-                for name, model in model_choices:
-                    model.fit(X_train, y_train)
-                    y_pred = model.predict(X_test)
-                    # Mean Squared Error (MSE) for regression tasks
-                    mse = mean_squared_error(y_test, y_pred)
-                    # Append results
-                    results.append([name, None, mse])
-            # Display results in a table
-            st.subheader("Model Performance Results")
-            results_df = pd.DataFrame(results, columns=["Model", "Accuracy" if is_classification else "Accuracy (N/A)",
-                                                        "Classification Report" if is_classification else "MSE (N/A)"])
-            # Bold the headers
-            st.markdown(f"**Model Performance Results**")
-            st.dataframe(results_df)
             # Option to download the model performance metrics (Results Table)
             st.download_button(
                 label="Download Model Report",
-                data=results_df.to_csv(index=False),
                 file_name="model_report.csv",
                 mime="text/csv"
             )
@@ -166,7 +164,7 @@ if uploaded_file is not None:
                 file_name="cleaned_dataset.csv",
                 mime="text/csv"
             )
             # Download correlation heatmap
             st.subheader("Correlation Heatmap")
             correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()

 from sklearn.model_selection import train_test_split
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.naive_bayes import GaussianNB
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+from tabulate import tabulate
 from scipy import stats
 # File uploader
             train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
+            # Initialize results storage
+            predictions = pd.DataFrame()
+            metrics = []
+            # Evaluate classifiers (if classification)
             if is_classification:
+                classifiers = {
+                    'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
+                    'Decision Tree': DecisionTreeClassifier(),
+                    'Random Forest': RandomForestClassifier(),
+                    'Support Vector Machine (SVM)': SVC(),
+                    'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
+                    'Naive Bayes': GaussianNB()
+                }
+                for name, classifier in classifiers.items():
+                    classifier.fit(X_train, y_train)
+                    y_pred = classifier.predict(X_test)
+                    predictions[name] = y_pred  # Store predictions
+                    # Evaluate metrics
                     accuracy = accuracy_score(y_test, y_pred)
+                    precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
+                    recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
+                    f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
+                    metrics.append({
+                        'Model': name,
+                        'Accuracy': round(accuracy, 2),
+                        'Precision': round(precision, 2),
+                        'Recall': round(recall, 2),
+                        'F1-Score': round(f1, 2)
+                    })
+            # Create a metrics DataFrame
+            metrics_df = pd.DataFrame(metrics)
+            # Format table with tabulate
+            table = tabulate(
+                metrics_df,
+                headers="keys",
+                tablefmt="fancy_grid",
+                showindex=False,
+                numalign="center",
+                stralign="center"
+            )
+            # Display formatted table
+            st.markdown(f"**Model Performance Metrics**")
+            st.text(table)
             # Option to download the model performance metrics (Results Table)
             st.download_button(
                 label="Download Model Report",
+                data=metrics_df.to_csv(index=False),
                 file_name="model_report.csv",
                 mime="text/csv"
             )
                 file_name="cleaned_dataset.csv",
                 mime="text/csv"
             )
             # Download correlation heatmap
             st.subheader("Correlation Heatmap")
             correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()