Commit
·
6df7a65
1
Parent(s):
4ca68c5
Add predictor portion.
Browse files
app.py
CHANGED
|
@@ -20,7 +20,9 @@ def _():
|
|
| 20 |
|
| 21 |
@app.cell
|
| 22 |
def _(pl):
|
| 23 |
-
df = pl.read_csv(
|
|
|
|
|
|
|
| 24 |
df.describe()
|
| 25 |
return (df,)
|
| 26 |
|
|
@@ -33,14 +35,19 @@ def _(df):
|
|
| 33 |
|
| 34 |
@app.cell
|
| 35 |
def _(df, pl):
|
| 36 |
-
from sklearn.preprocessing import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
from sklearn.pipeline import make_pipeline
|
| 38 |
from sklearn.compose import make_column_transformer
|
| 39 |
from sklearn.linear_model import (
|
| 40 |
LogisticRegression,
|
| 41 |
BayesianRidge,
|
| 42 |
RidgeClassifier,
|
| 43 |
-
SGDClassifier
|
| 44 |
)
|
| 45 |
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
|
| 46 |
from sklearn.naive_bayes import BernoulliNB
|
|
@@ -103,7 +110,10 @@ def _(df, pl):
|
|
| 103 |
preprocessor, RidgeClassifier(max_iter=10000, random_state=random_state)
|
| 104 |
)
|
| 105 |
sgd_pipe = make_pipeline(
|
| 106 |
-
preprocessor,
|
|
|
|
|
|
|
|
|
|
| 107 |
)
|
| 108 |
lda_pipe = make_pipeline(preprocessor, QuadraticDiscriminantAnalysis())
|
| 109 |
bnb_pipe = make_pipeline(preprocessor, BernoulliNB())
|
|
@@ -122,7 +132,7 @@ def _(df, pl):
|
|
| 122 |
("dtree", dtree),
|
| 123 |
],
|
| 124 |
voting="soft",
|
| 125 |
-
weights=[
|
| 126 |
),
|
| 127 |
)
|
| 128 |
bag_pipe = make_pipeline(preprocessor, bag)
|
|
@@ -335,12 +345,12 @@ def _(
|
|
| 335 |
|
| 336 |
## Gradient Boosting Classifier
|
| 337 |
|
| 338 |
-
- Accuracy: {
|
| 339 |
-
- Precision: {
|
| 340 |
-
- Recall: {
|
| 341 |
-
- F1: {
|
| 342 |
-
- ROC-AUC: {
|
| 343 |
-
- Log Loss: {
|
| 344 |
|
| 345 |
## Voting Classifier
|
| 346 |
|
|
@@ -360,10 +370,12 @@ def _(
|
|
| 360 |
- ROC-AUC: {roc_auc_score(y_test, bag_pred)}
|
| 361 |
- Log Loss: {log_loss(y_test, bag_pred)}
|
| 362 |
|
| 363 |
-
{
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
|
|
|
|
|
|
| 367 |
""")
|
| 368 |
return (
|
| 369 |
accuracy_score,
|
|
@@ -382,17 +394,65 @@ def _(
|
|
| 382 |
|
| 383 |
@app.cell
|
| 384 |
def _(mo):
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
|
| 391 |
-
mo.
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
|
| 398 |
if __name__ == "__main__":
|
|
|
|
| 20 |
|
| 21 |
@app.cell
|
| 22 |
def _(pl):
|
| 23 |
+
df = pl.read_csv(
|
| 24 |
+
"hf://datasets/louiecerv/customer_churn/customer_churn_data.csv"
|
| 25 |
+
)
|
| 26 |
df.describe()
|
| 27 |
return (df,)
|
| 28 |
|
|
|
|
| 35 |
|
| 36 |
@app.cell
|
| 37 |
def _(df, pl):
|
| 38 |
+
from sklearn.preprocessing import (
|
| 39 |
+
RobustScaler,
|
| 40 |
+
OneHotEncoder,
|
| 41 |
+
MinMaxScaler,
|
| 42 |
+
OrdinalEncoder,
|
| 43 |
+
)
|
| 44 |
from sklearn.pipeline import make_pipeline
|
| 45 |
from sklearn.compose import make_column_transformer
|
| 46 |
from sklearn.linear_model import (
|
| 47 |
LogisticRegression,
|
| 48 |
BayesianRidge,
|
| 49 |
RidgeClassifier,
|
| 50 |
+
SGDClassifier,
|
| 51 |
)
|
| 52 |
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
|
| 53 |
from sklearn.naive_bayes import BernoulliNB
|
|
|
|
| 110 |
preprocessor, RidgeClassifier(max_iter=10000, random_state=random_state)
|
| 111 |
)
|
| 112 |
sgd_pipe = make_pipeline(
|
| 113 |
+
preprocessor,
|
| 114 |
+
SGDClassifier(
|
| 115 |
+
loss="hinge", penalty="l2", max_iter=10000, random_state=random_state
|
| 116 |
+
),
|
| 117 |
)
|
| 118 |
lda_pipe = make_pipeline(preprocessor, QuadraticDiscriminantAnalysis())
|
| 119 |
bnb_pipe = make_pipeline(preprocessor, BernoulliNB())
|
|
|
|
| 132 |
("dtree", dtree),
|
| 133 |
],
|
| 134 |
voting="soft",
|
| 135 |
+
weights=[5, 2],
|
| 136 |
),
|
| 137 |
)
|
| 138 |
bag_pipe = make_pipeline(preprocessor, bag)
|
|
|
|
| 345 |
|
| 346 |
## Gradient Boosting Classifier
|
| 347 |
|
| 348 |
+
- Accuracy: {accuracy_score(y_test, gbc_pred)}
|
| 349 |
+
- Precision: {precision_score(y_test, gbc_pred)}
|
| 350 |
+
- Recall: {recall_score(y_test, gbc_pred)}
|
| 351 |
+
- F1: {f1_score(y_test, gbc_pred)}
|
| 352 |
+
- ROC-AUC: {roc_auc_score(y_test, gbc_pred)}
|
| 353 |
+
- Log Loss: {log_loss(y_test, gbc_pred)}
|
| 354 |
|
| 355 |
## Voting Classifier
|
| 356 |
|
|
|
|
| 370 |
- ROC-AUC: {roc_auc_score(y_test, bag_pred)}
|
| 371 |
- Log Loss: {log_loss(y_test, bag_pred)}
|
| 372 |
|
| 373 |
+
{
|
| 374 |
+
mo.callout(
|
| 375 |
+
"From the metrics, the Quadratic Discriminant Analysis and the Decision Tree Classifier perform the best, thus, they were chosen for the Voting Classifier",
|
| 376 |
+
kind="info",
|
| 377 |
+
)
|
| 378 |
+
}
|
| 379 |
""")
|
| 380 |
return (
|
| 381 |
accuracy_score,
|
|
|
|
| 394 |
|
| 395 |
@app.cell
|
| 396 |
def _(mo):
|
| 397 |
+
user_inputs = mo.ui.dictionary(
|
| 398 |
+
{
|
| 399 |
+
"tenure": mo.ui.number(label="Tenure", start=1, stop=72, step=1),
|
| 400 |
+
"monthly_charges": mo.ui.number(
|
| 401 |
+
label="Monthly Charges", start=20, stop=120, step=1
|
| 402 |
+
),
|
| 403 |
+
"total_charges": mo.ui.number(
|
| 404 |
+
label="Total Charges", start=20, stop=8000, step=1
|
| 405 |
+
),
|
| 406 |
+
"contract": mo.ui.dropdown(
|
| 407 |
+
label="Contract (Year)", options=["None", "One", "Two"]
|
| 408 |
+
),
|
| 409 |
+
"service": mo.ui.dropdown(
|
| 410 |
+
label="Service", options=["None", "Basic", "Fiber Optic"]
|
| 411 |
+
),
|
| 412 |
+
}
|
| 413 |
+
)
|
| 414 |
|
| 415 |
+
mo.vstack(user_inputs.values())
|
| 416 |
+
return (user_inputs,)
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
@app.cell
|
| 420 |
+
def _(mo, pl, user_inputs, vot_pipe):
|
| 421 |
+
contract = None
|
| 422 |
+
service = None
|
| 423 |
+
|
| 424 |
+
match user_inputs["contract"].value:
|
| 425 |
+
case "None":
|
| 426 |
+
contract = "false_false"
|
| 427 |
+
case "One":
|
| 428 |
+
contract = "true_false"
|
| 429 |
+
case "Two":
|
| 430 |
+
contract = "false_true"
|
| 431 |
+
case _:
|
| 432 |
+
pass
|
| 433 |
+
|
| 434 |
+
match user_inputs["service"].value:
|
| 435 |
+
case "None":
|
| 436 |
+
service = "false_false"
|
| 437 |
+
case "Basic":
|
| 438 |
+
service = "true_false"
|
| 439 |
+
case "Fiber Optic":
|
| 440 |
+
service = "false_true"
|
| 441 |
+
case _:
|
| 442 |
+
pass
|
| 443 |
+
|
| 444 |
+
preds = pl.DataFrame({
|
| 445 |
+
"tenure": user_inputs["tenure"].value,
|
| 446 |
+
"monthly_charges": user_inputs["monthly_charges"].value,
|
| 447 |
+
"total_charges": user_inputs["total_charges"].value,
|
| 448 |
+
"contract_One Two year": contract,
|
| 449 |
+
"internet_service_Fiber No": service,
|
| 450 |
+
})
|
| 451 |
+
|
| 452 |
+
prediction = (vot_pipe.predict(preds), vot_pipe.predict_proba(preds))
|
| 453 |
+
|
| 454 |
+
mo.md(f"Prediction: {"Yes" if prediction[0][0] else "No" }, with about {prediction[1][0][0] * 100 if not prediction[0][0] else prediction[1][0][1] * 100:.2f}% probability.")
|
| 455 |
+
return contract, prediction, preds, service
|
| 456 |
|
| 457 |
|
| 458 |
if __name__ == "__main__":
|