Spaces:

Mpavan45
/

Hotel_Data_Analysis

Sleeping

App Files Files Community

Mpavan45 commited on Jan 23, 2025

Commit

5bf15d7

verified ·

1 Parent(s): a6e190e

Update pages/4_Model Creation and Evaluation.py

Browse files

Files changed (1) hide show

pages/4_Model Creation and Evaluation.py +159 -84

pages/4_Model Creation and Evaluation.py CHANGED Viewed

@@ -71,96 +71,171 @@ st.write("Model training and selection is a crucial phase in machine learning. A
 st.subheader("Data Splitting")
 st.write("The dataset is divided into training and testing sets. The training set is used to train the model, while the testing set is used to evaluate its performance on unseen data.")
-# Ensure the required data is in session_state
 if 'X_res' in st.session_state and 'y_res' in st.session_state:
-    # Retrieve data from session state
     X_res = st.session_state['X_res']
     y_res = st.session_state['y_res']
-    # Perform train-test split
     x_train, x_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
-    # Optional: Store train-test split data back into session state
-    st.session_state['X_train'] = x_train
-    st.session_state['X_test'] = x_test
-    st.session_state['y_train'] = y_train
-    st.session_state['y_test'] = y_test
-    st.write("Train-test split completed!")
-    # Debugging: Check data shapes
-    st.write(f"x_train shape: {x_train.shape}, x_test shape: {x_test.shape}")
-    st.write(f"y_train shape: {y_train.shape}, y_test shape: {y_test.shape}")
-    # Data Preprocessing
-    st.subheader("Data Preprocessing")
-    st.write("""
-    Before training the model, the data may need to be preprocessed. This includes scaling features using techniques like:
-    - **StandardScaler**: Standardizes features by removing the mean and scaling to unit variance.
-    - **MinMaxScaler**: Scales features to a specific range, typically between 0 and 1.
-    """)
-    # Data Scaling
-    scaler = StandardScaler()
-    x_train_std = scaler.fit_transform(x_train)
-    x_test_std = scaler.transform(x_test)
-    # Hyperparameter Tuning with Optuna
-    st.subheader("Hyperparameter Tuning with Optuna")
-    st.write("""
-    Optuna is an automatic hyperparameter optimization framework that allows us to efficiently search for the best hyperparameters for our models. It uses a technique called Bayesian Optimization to find the optimal set of hyperparameters that maximize the model's performance.
-    """)
-    # Optuna Objective Function
-    def objective(trial):
-        # Model Selection
-        algo = trial.suggest_categorical("choice", ["KNN", "Logistic"])
-        if algo == "KNN":
-            n = trial.suggest_int("n_neighbors", 1, 50)
-            p = trial.suggest_int("distance", 1, 2)
-            model = KNeighborsClassifier(n_neighbors=n, p=p)
-        else:
-            solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
-            C = trial.suggest_uniform("lambda", 0.01, 1000)
-            if penalty == "elasticnet":
-                model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class="multinomial", l1_ratio=0.3)
-            else:
-                model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class="multinomial")
-        return cross_val_score(model, x_train_std, y_train, cv=5, scoring="neg_log_loss").mean()
-    # Optuna Study
-    study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
-    study.optimize(objective, n_trials=200)
-    # Display Results
-    st.write("### Optuna Results")
-    st.write("Best Parameters:", study.best_params)
-    st.write(study.trials_dataframe())
-    # Best Parameters
-    solver = study.best_params.get('solver', 'lbfgs')
-    penalty = study.best_params.get('penalty', 'l2')
-    C = study.best_params.get('lambda', 1.0)
-    # Model Training with Best Parameters
-    model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class="multinomial", max_iter=500)
-    model.fit(x_train_std, y_train)
-    # Evaluation Metrics
-    y_pred_probs = model.predict_proba(x_test_std)
-    loss = log_loss(y_test, y_pred_probs)
-    y_pred = np.argmax(y_pred_probs, axis=1)
-    # Confusion Matrix
-    cm = confusion_matrix(y_test, y_pred)
-    st.write("### Model Evaluation Results")
-    st.write(f"Log-Loss Score: {loss}")
-    st.write("Confusion Matrix:")
-    st.write(cm)
-else:
-    st.error("Training and testing data are not available. Please run the previous steps first.")

 st.subheader("Data Splitting")
 st.write("The dataset is divided into training and testing sets. The training set is used to train the model, while the testing set is used to evaluate its performance on unseen data.")
+# Page Title
+st.markdown("<h1 style='text-align:center; color:purple;'>Model Creation and Evaluation</h1>", unsafe_allow_html=True)
+# Code and Output 1: Data Splitting
+st.subheader("Step 1: Data Splitting")
+# Code for Data Splitting
+code_1 = """
+# Data Splitting
+x_train, x_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
+"""
+st.code(code_1, language='python')
+# Output for Data Splitting
 if 'X_res' in st.session_state and 'y_res' in st.session_state:
     X_res = st.session_state['X_res']
     y_res = st.session_state['y_res']
     x_train, x_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
+    st.write(f"x_train shape: {x_train.shape}")
+    st.write(f"x_test shape: {x_test.shape}")
+    st.write(f"y_train shape: {y_train.shape}")
+    st.write(f"y_test shape: {y_test.shape}")
+else:
+    st.error("Training and testing data are not available. Please run the previous steps first.")
+# Code and Output 2: Data Scaling
+st.subheader("Step 2: Data Scaling")
+# Code for Data Scaling
+code_2 = """
+# Data Scaling
+scaler = StandardScaler()
+x_train_std = scaler.fit_transform(x_train)
+x_test_std = scaler.transform(x_test)
+"""
+st.code(code_2, language='python')
+# Output for Data Scaling
+scaler = StandardScaler()
+x_train_std = scaler.fit_transform(x_train)
+x_test_std = scaler.transform(x_test)
+st.write(f"Scaled x_train_std shape: {x_train_std.shape}")
+st.write(f"Scaled x_test_std shape: {x_test_std.shape}")
+# Code and Output 3: Optuna Optimization
+st.subheader("Step 3: Hyperparameter Optimization with Optuna")
+# Code for Optuna
+code_3 = """
+# Optuna Objective Function
+def objective(trial):
+    algo = trial.suggest_categorical("choice", ["KNN", "Logistic"])
+    if algo == "KNN":
+        n = trial.suggest_int("n_neighbors", 1, 50)
+        p = trial.suggest_int("distance", 1, 2)
+        model = KNeighborsClassifier(n_neighbors=n, p=p)
+    else:
+        solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2")])
+        C = trial.suggest_uniform("lambda", 0.01, 1000)
+        model = LogisticRegression(C=C, solver=solver, penalty=penalty)
+    return cross_val_score(model, x_train_std, y_train, cv=5, scoring="neg_log_loss").mean()
+study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
+study.optimize(objective, n_trials=200)
+"""
+st.code(code_3, language='python')
+# Output for Optuna Optimization
+study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
+study.optimize(objective, n_trials=200)
+st.write("Best Parameters found by Optuna:", study.best_params)
+st.write("All Trials Dataframe:")
+st.write(study.trials_dataframe())
+# Code and Output 4: Model Training with Best Parameters
+st.subheader("Step 4: Model Training with Best Parameters")
+# Code for Model Training
+code_4 = """
+# Model Training with Best Parameters
+solver = study.best_params.get('solver', 'lbfgs')
+penalty = study.best_params.get('penalty', 'l2')
+C = study.best_params.get('lambda', 1.0)
+model = LogisticRegression(C=C, solver=solver, penalty=penalty, max_iter=500)
+model.fit(x_train_std, y_train)
+"""
+st.code(code_4, language='python')
+# Output for Model Training
+solver = study.best_params.get('solver', 'lbfgs')
+penalty = study.best_params.get('penalty', 'l2')
+C = study.best_params.get('lambda', 1.0)
+model = LogisticRegression(C=C, solver=solver, penalty=penalty, max_iter=500)
+model.fit(x_train_std, y_train)
+st.write("Model has been trained successfully!")
+# Code and Output 5: Model Evaluation
+st.subheader("Step 5: Model Evaluation")
+# Code for Model Evaluation
+code_5 = """
+# Model Evaluation
+y_pred_probs = model.predict_proba(x_test_std)
+loss = log_loss(y_test, y_pred_probs)
+y_pred = np.argmax(y_pred_probs, axis=1)
+cm = confusion_matrix(y_test, y_pred)
+"""
+st.code(code_5, language='python')
+# Output for Model Evaluation
+y_pred_probs = model.predict_proba(x_test_std)
+loss = log_loss(y_test, y_pred_probs)
+y_pred = np.argmax(y_pred_probs, axis=1)
+cm = confusion_matrix(y_test, y_pred)
+st.write(f"Log-Loss Score: {loss}")
+st.write("Confusion Matrix:")
+st.write(cm)
+import streamlit as st
+# Custom CSS for buttons
+st.markdown(
+    """
+    <style>
+    .custom-button {
+        display: inline-block;
+        padding: 5px 10px;
+        font-size: 14px;
+        color: #ffffff;
+        background-color: #4CAF50;
+        border: none;
+        border-radius: 5px;
+        text-align: center;
+        text-decoration: none;
+        transition: background-color 0.3s ease, transform 0.2s ease;
+        cursor: pointer;
+    }
+    .custom-button:hover {
+        background-color: #45a049;
+        transform: scale(1.05);
+    }
+    .button-container {
+        display: flex;
+        justify-content: space-between;
+        margin-top: 20px;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+# Navigation Buttons
+st.markdown(
+    """
+    <div class="button-container">
+        <a href="pages/3_EDA_and_Feature_Engineering" target="_self" class="custom-button">Previous ⏮️</a>
+        <a href="pages/5_Conclusion" target="_self" class="custom-button">Next ⏭️</a>
+    </div>
+    """,
+    unsafe_allow_html=True,
+)