Spaces:

computerscience-person
/

CCS229_Customer_Churn

Sleeping

App Files Files Community

computerscience-person commited on Mar 16, 2025

Commit

6df7a65

1 Parent(s): 4ca68c5

Add predictor portion.

Browse files

Files changed (1) hide show

app.py +85 -25

app.py CHANGED Viewed

@@ -20,7 +20,9 @@ def _():
 @app.cell
 def _(pl):
-    df = pl.read_csv('hf://datasets/louiecerv/customer_churn/customer_churn_data.csv')
     df.describe()
     return (df,)
@@ -33,14 +35,19 @@ def _(df):
 @app.cell
 def _(df, pl):
-    from sklearn.preprocessing import RobustScaler, OneHotEncoder, MinMaxScaler, OrdinalEncoder
     from sklearn.pipeline import make_pipeline
     from sklearn.compose import make_column_transformer
     from sklearn.linear_model import (
         LogisticRegression,
         BayesianRidge,
         RidgeClassifier,
-        SGDClassifier
     )
     from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
     from sklearn.naive_bayes import BernoulliNB
@@ -103,7 +110,10 @@ def _(df, pl):
         preprocessor, RidgeClassifier(max_iter=10000, random_state=random_state)
     )
     sgd_pipe = make_pipeline(
-        preprocessor, SGDClassifier(loss='hinge', penalty='l2', max_iter=10000, random_state=random_state)
     )
     lda_pipe = make_pipeline(preprocessor, QuadraticDiscriminantAnalysis())
     bnb_pipe = make_pipeline(preprocessor, BernoulliNB())
@@ -122,7 +132,7 @@ def _(df, pl):
                 ("dtree", dtree),
             ],
             voting="soft",
-            weights=[3, 2],
         ),
     )
     bag_pipe = make_pipeline(preprocessor, bag)
@@ -335,12 +345,12 @@ def _(
     ## Gradient Boosting Classifier
-    - Accuracy: { accuracy_score(y_test, gbc_pred) }
-    - Precision: { precision_score(y_test, gbc_pred) }
-    - Recall: { recall_score(y_test, gbc_pred) }
-    - F1: { f1_score(y_test, gbc_pred) }
-    - ROC-AUC: { roc_auc_score(y_test, gbc_pred) }
-    - Log Loss: { log_loss(y_test, gbc_pred) }
     ## Voting Classifier
@@ -360,10 +370,12 @@ def _(
     - ROC-AUC: {roc_auc_score(y_test, bag_pred)}
     - Log Loss: {log_loss(y_test, bag_pred)}
-    {mo.callout(
-        "From the metrics, the Quadratic Discriminant Analysis and the Decision Tree Classifier perform the best, thus, they were chosen for the Voting Classifier",
-        kind='info'
-    )}
     """)
     return (
         accuracy_score,
@@ -382,17 +394,65 @@ def _(
 @app.cell
 def _(mo):
-    preds = {
-        'tenure': mo.ui.number(start=1, stop=72, step=1),
-        'monthly_charges': mo.ui.number(start=20, stop=120, step=1),
-    }
-    mo.md(f"""
-    Tenure: {preds['tenure']}
-    Monthy Charge: {preds['monthly_charges']}
-    """)
-    return (preds,)
 if __name__ == "__main__":

 @app.cell
 def _(pl):
+    df = pl.read_csv(
+        "hf://datasets/louiecerv/customer_churn/customer_churn_data.csv"
+    )
     df.describe()
     return (df,)
 @app.cell
 def _(df, pl):
+    from sklearn.preprocessing import (
+        RobustScaler,
+        OneHotEncoder,
+        MinMaxScaler,
+        OrdinalEncoder,
+    )
     from sklearn.pipeline import make_pipeline
     from sklearn.compose import make_column_transformer
     from sklearn.linear_model import (
         LogisticRegression,
         BayesianRidge,
         RidgeClassifier,
+        SGDClassifier,
     )
     from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
     from sklearn.naive_bayes import BernoulliNB
         preprocessor, RidgeClassifier(max_iter=10000, random_state=random_state)
     )
     sgd_pipe = make_pipeline(
+        preprocessor,
+        SGDClassifier(
+            loss="hinge", penalty="l2", max_iter=10000, random_state=random_state
+        ),
     )
     lda_pipe = make_pipeline(preprocessor, QuadraticDiscriminantAnalysis())
     bnb_pipe = make_pipeline(preprocessor, BernoulliNB())
                 ("dtree", dtree),
             ],
             voting="soft",
+            weights=[5, 2],
         ),
     )
     bag_pipe = make_pipeline(preprocessor, bag)
     ## Gradient Boosting Classifier
+    - Accuracy: {accuracy_score(y_test, gbc_pred)}
+    - Precision: {precision_score(y_test, gbc_pred)}
+    - Recall: {recall_score(y_test, gbc_pred)}
+    - F1: {f1_score(y_test, gbc_pred)}
+    - ROC-AUC: {roc_auc_score(y_test, gbc_pred)}
+    - Log Loss: {log_loss(y_test, gbc_pred)}
     ## Voting Classifier
     - ROC-AUC: {roc_auc_score(y_test, bag_pred)}
     - Log Loss: {log_loss(y_test, bag_pred)}
+    {
+        mo.callout(
+            "From the metrics, the Quadratic Discriminant Analysis and the Decision Tree Classifier perform the best, thus, they were chosen for the Voting Classifier",
+            kind="info",
+        )
+    }
     """)
     return (
         accuracy_score,
 @app.cell
 def _(mo):
+    user_inputs = mo.ui.dictionary(
+        {
+            "tenure": mo.ui.number(label="Tenure", start=1, stop=72, step=1),
+            "monthly_charges": mo.ui.number(
+                label="Monthly Charges", start=20, stop=120, step=1
+            ),
+            "total_charges": mo.ui.number(
+                label="Total Charges", start=20, stop=8000, step=1
+            ),
+            "contract": mo.ui.dropdown(
+                label="Contract (Year)", options=["None", "One", "Two"]
+            ),
+            "service": mo.ui.dropdown(
+                label="Service", options=["None", "Basic", "Fiber Optic"]
+            ),
+        }
+    )
+    mo.vstack(user_inputs.values())
+    return (user_inputs,)
+@app.cell
+def _(mo, pl, user_inputs, vot_pipe):
+    contract = None
+    service = None
+    match user_inputs["contract"].value:
+        case "None":
+            contract = "false_false"
+        case "One":
+            contract = "true_false"
+        case "Two":
+            contract = "false_true"
+        case _:
+            pass
+    match user_inputs["service"].value:
+        case "None":
+            service = "false_false"
+        case "Basic":
+            service = "true_false"
+        case "Fiber Optic":
+            service = "false_true"
+        case _:
+            pass
+    preds = pl.DataFrame({
+        "tenure": user_inputs["tenure"].value,
+        "monthly_charges": user_inputs["monthly_charges"].value,
+        "total_charges": user_inputs["total_charges"].value,
+        "contract_One Two year": contract,
+        "internet_service_Fiber No": service,
+    })
+    prediction = (vot_pipe.predict(preds), vot_pipe.predict_proba(preds))
+    mo.md(f"Prediction: {"Yes" if prediction[0][0] else "No" }, with about {prediction[1][0][0] * 100 if not prediction[0][0] else prediction[1][0][1] * 100:.2f}% probability.")
+    return contract, prediction, preds, service
 if __name__ == "__main__":