Spaces:

trohith89
/

Electronics-Sales-Classification

Sleeping

App Files Files Community

trohith89 commited on Jan 26, 2025

Commit

7b29dae

verified ·

1 Parent(s): 74238f0

Update pages/4_Model_Creation_and_Evaluation.py

Browse files

Files changed (1) hide show

pages/4_Model_Creation_and_Evaluation.py +127 -21

pages/4_Model_Creation_and_Evaluation.py CHANGED Viewed

@@ -104,15 +104,33 @@ if df is not None:
     # Define the objective function for Optuna
     st.code("""
     def objective(trial):
-        # Choose algorithm
-        algo = trial.suggest_categorical("algo", ["lor", "svc"])
         if algo == "svc":
-            # SVC hyperparameters
             c = trial.suggest_float("C", 0.001, 1000, log=True)
             kernel = trial.suggest_categorical("kernel", ['linear', 'poly', 'rbf', 'sigmoid'])
             if kernel == 'poly':
                 degree = trial.suggest_int("degree", 1, 3)
                 model = SVC(C=c, kernel=kernel, degree=degree, random_state=42)
@@ -122,7 +140,7 @@ if df is not None:
             else:
                 model = SVC(C=c, kernel=kernel, random_state=42)
         else:
-            # Logistic Regression hyperparameters
             solver, penalty = trial.suggest_categorical(
                 "choices", [
                     ("lbfgs", "l2"), ("newton-cg", "l2"),
@@ -132,22 +150,62 @@ if df is not None:
             )
             reg_strength = trial.suggest_float("C", 0.001, 1000, log=True)
             l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
             if penalty == "elasticnet":
-                model = LogisticRegression(
-                    solver=solver, penalty=penalty, C=reg_strength,
-                    l1_ratio=l1_ratio, random_state=42
-                )
             else:
-                model = LogisticRegression(
-                    solver=solver, penalty=penalty, C=reg_strength, random_state=42
-                )
-        # Perform cross-validation and return the mean score
-        score = cross_val_score(model, x_train_std, y_train, cv=5, scoring="accuracy").mean()
-        return score
     """, language="python")
     # Create and optimize the study
     st.code("""
     study = optuna.create_study(direction="maximize")
@@ -158,7 +216,7 @@ if df is not None:
     # Create the best model
     st.markdown("## Create the Best Model")
-    model = SVC(kernel='rbf', gamma='scale', C=53.123097332514455)
     st.write(model)
     # Train the model
@@ -168,9 +226,57 @@ if df is not None:
     # Model Evaluation
     st.markdown("# Model Evaluation")
     y_pred = model.predict(x_test_std)
-    st.write("Accuracy:", accuracy_score(y_test, y_pred))
-    st.write("Classification Report:\n", classification_report(y_test, y_pred))
-    st.write("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
 else:
     st.warning("No Dataset Found")

     # Define the objective function for Optuna
     st.code("""
+    import numpy as np
+    import optuna
+    from sklearn.svm import SVC
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.model_selection import cross_validate
+    from sklearn.preprocessing import StandardScaler
+    # Check for NaN or infinite values in the data
+    assert not np.any(np.isnan(x_train_std)), "Input data contains NaN values"
+    assert not np.any(np.isnan(y_train)), "Target data contains NaN values"
+    assert not np.any(np.isinf(x_train_std)), "Input data contains infinite values"
+    # Global lists to store training and validation scores for each trial
+    training_scores = []
+    validation_scores = []
     def objective(trial):
+        # Log trial parameters for debugging
+        print(f"Trial params: {trial.params}")
+        algo = trial.suggest_categorical("algo", ["lor", "svc"])
         if algo == "svc":
+            # Hyperparameters for SVC
             c = trial.suggest_float("C", 0.001, 1000, log=True)
             kernel = trial.suggest_categorical("kernel", ['linear', 'poly', 'rbf', 'sigmoid'])
             if kernel == 'poly':
                 degree = trial.suggest_int("degree", 1, 3)
                 model = SVC(C=c, kernel=kernel, degree=degree, random_state=42)
             else:
                 model = SVC(C=c, kernel=kernel, random_state=42)
         else:
+            # Hyperparameters for Logistic Regression
             solver, penalty = trial.suggest_categorical(
                 "choices", [
                     ("lbfgs", "l2"), ("newton-cg", "l2"),
             )
             reg_strength = trial.suggest_float("C", 0.001, 1000, log=True)
             l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
             if penalty == "elasticnet":
+                model = LogisticRegression(solver=solver, penalty=penalty, C=reg_strength, l1_ratio=l1_ratio, random_state=42)
             else:
+                model = LogisticRegression(solver=solver, penalty=penalty, C=reg_strength, random_state=42)
+        # Cross-validation scoring with training and validation
+        try:
+            scores = cross_validate(
+                model, x_train_std, y_train, cv=5,
+                scoring="accuracy", return_train_score=True
+            )
+            train_score = scores["train_score"].mean()
+            val_score = scores["test_score"].mean()
+            # Append scores to global lists
+            training_scores.append(train_score)
+            validation_scores.append(val_score)
+        except ValueError as e:
+            print(f"Error during cross-validation: {e}")
+            train_score, val_score = float("-inf"), float("-inf")
+        return val_score
+    # Running the optimization
+    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
+    study.optimize(objective, n_trials=100)
+    # Plotting training vs. validation scores
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(10, 6))
+    plt.plot(training_scores, label="Training Score", marker="o")
+    plt.plot(validation_scores, label="Validation Score", marker="x")
+    plt.xlabel("Trial")
+    plt.ylabel("Accuracy")
+    plt.title("Training vs. Validation Scores Across Trials")
+    plt.legend()
+    plt.grid()
+    plt.show()
+    # Display best trial
+    print("Best Parameters:")
+    print(study.best_params)
     """, language="python")
+    st.markdown(
+        """
+        <div style="text-align: center;">
+            <img src="https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/FqUoV8hSyCWU3WocaqqGc.png" width="70%" />
+        </div>
+        """,
+        unsafe_allow_html=True
+    )
     # Create and optimize the study
     st.code("""
     study = optuna.create_study(direction="maximize")
     # Create the best model
     st.markdown("## Create the Best Model")
+    model = SVC(kernel='poly', gamma = 'scale', C = 974.1963187644974, degree = 2)
     st.write(model)
     # Train the model
     # Model Evaluation
     st.markdown("# Model Evaluation")
     y_pred = model.predict(x_test_std)
+    # Evaluation metrics
+    print("Accuracy:", accuracy_score(y_test, y_pred))
+    print("Classification Report:\n", classification_report(y_test, y_pred))
+    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
+    import streamlit as st
+    import pandas as pd
+    import seaborn as sns
+    import matplotlib.pyplot as plt
+    from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+    # Example: Replace this with your actual test data and predictions
+    y_pred = model.predict(x_test_std)
+    # Calculate evaluation metrics
+    conf_matrix = confusion_matrix(y_test, y_pred)
+    class_report = classification_report(y_test, y_pred, output_dict=True)  # Output as a dictionary
+    # Convert the classification report to a DataFrame
+    class_report_df = pd.DataFrame(class_report).iloc[:-1, :-1]  # Exclude support and accuracy rows
+    # Streamlit app
+    st.title("Model Evaluation: Confusion Matrix and Classification Report")
+    # Plotting with Matplotlib and Seaborn
+    fig, axs = plt.subplots(1, 2, figsize=(16, 6))
+    # Confusion Matrix Heatmap
+    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False, ax=axs[0], annot_kws={"size": 14})
+    axs[0].set_title("Confusion Matrix", fontsize=16)
+    axs[0].set_xlabel("Predicted Labels", fontsize=14)
+    axs[0].set_ylabel("True Labels", fontsize=14)
+    # Classification Report Heatmap
+    sns.heatmap(class_report_df, annot=True, fmt=".2f", cmap="YlGnBu", cbar=False, ax=axs[1], annot_kws={"size": 12})
+    axs[1].set_title("Classification Report", fontsize=16)
+    axs[1].set_xlabel("Metrics", fontsize=14)
+    axs[1].set_ylabel("Classes", fontsize=14)
+    # Adjust layout
+    plt.tight_layout()
+    # Display the plots in Streamlit
+    st.pyplot(fig)
+    # Display additional metrics (optional)
+    accuracy = accuracy_score(y_test, y_pred)
+    st.write(f"**Accuracy:** {accuracy:.2f}")
 else:
     st.warning("No Dataset Found")