computerscience-person commited on
Commit
6df7a65
·
1 Parent(s): 4ca68c5

Add predictor portion.

Browse files
Files changed (1) hide show
  1. app.py +85 -25
app.py CHANGED
@@ -20,7 +20,9 @@ def _():
20
 
21
  @app.cell
22
  def _(pl):
23
- df = pl.read_csv('hf://datasets/louiecerv/customer_churn/customer_churn_data.csv')
 
 
24
  df.describe()
25
  return (df,)
26
 
@@ -33,14 +35,19 @@ def _(df):
33
 
34
  @app.cell
35
  def _(df, pl):
36
- from sklearn.preprocessing import RobustScaler, OneHotEncoder, MinMaxScaler, OrdinalEncoder
 
 
 
 
 
37
  from sklearn.pipeline import make_pipeline
38
  from sklearn.compose import make_column_transformer
39
  from sklearn.linear_model import (
40
  LogisticRegression,
41
  BayesianRidge,
42
  RidgeClassifier,
43
- SGDClassifier
44
  )
45
  from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
46
  from sklearn.naive_bayes import BernoulliNB
@@ -103,7 +110,10 @@ def _(df, pl):
103
  preprocessor, RidgeClassifier(max_iter=10000, random_state=random_state)
104
  )
105
  sgd_pipe = make_pipeline(
106
- preprocessor, SGDClassifier(loss='hinge', penalty='l2', max_iter=10000, random_state=random_state)
 
 
 
107
  )
108
  lda_pipe = make_pipeline(preprocessor, QuadraticDiscriminantAnalysis())
109
  bnb_pipe = make_pipeline(preprocessor, BernoulliNB())
@@ -122,7 +132,7 @@ def _(df, pl):
122
  ("dtree", dtree),
123
  ],
124
  voting="soft",
125
- weights=[3, 2],
126
  ),
127
  )
128
  bag_pipe = make_pipeline(preprocessor, bag)
@@ -335,12 +345,12 @@ def _(
335
 
336
  ## Gradient Boosting Classifier
337
 
338
- - Accuracy: { accuracy_score(y_test, gbc_pred) }
339
- - Precision: { precision_score(y_test, gbc_pred) }
340
- - Recall: { recall_score(y_test, gbc_pred) }
341
- - F1: { f1_score(y_test, gbc_pred) }
342
- - ROC-AUC: { roc_auc_score(y_test, gbc_pred) }
343
- - Log Loss: { log_loss(y_test, gbc_pred) }
344
 
345
  ## Voting Classifier
346
 
@@ -360,10 +370,12 @@ def _(
360
  - ROC-AUC: {roc_auc_score(y_test, bag_pred)}
361
  - Log Loss: {log_loss(y_test, bag_pred)}
362
 
363
- {mo.callout(
364
- "From the metrics, the Quadratic Discriminant Analysis and the Decision Tree Classifier perform the best, thus, they were chosen for the Voting Classifier",
365
- kind='info'
366
- )}
 
 
367
  """)
368
  return (
369
  accuracy_score,
@@ -382,17 +394,65 @@ def _(
382
 
383
  @app.cell
384
  def _(mo):
385
- preds = {
386
- 'tenure': mo.ui.number(start=1, stop=72, step=1),
387
- 'monthly_charges': mo.ui.number(start=20, stop=120, step=1),
388
-
389
- }
 
 
 
 
 
 
 
 
 
 
 
 
390
 
391
- mo.md(f"""
392
- Tenure: {preds['tenure']}
393
- Monthy Charge: {preds['monthly_charges']}
394
- """)
395
- return (preds,)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
 
398
  if __name__ == "__main__":
 
20
 
21
  @app.cell
22
  def _(pl):
23
+ df = pl.read_csv(
24
+ "hf://datasets/louiecerv/customer_churn/customer_churn_data.csv"
25
+ )
26
  df.describe()
27
  return (df,)
28
 
 
35
 
36
  @app.cell
37
  def _(df, pl):
38
+ from sklearn.preprocessing import (
39
+ RobustScaler,
40
+ OneHotEncoder,
41
+ MinMaxScaler,
42
+ OrdinalEncoder,
43
+ )
44
  from sklearn.pipeline import make_pipeline
45
  from sklearn.compose import make_column_transformer
46
  from sklearn.linear_model import (
47
  LogisticRegression,
48
  BayesianRidge,
49
  RidgeClassifier,
50
+ SGDClassifier,
51
  )
52
  from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
53
  from sklearn.naive_bayes import BernoulliNB
 
110
  preprocessor, RidgeClassifier(max_iter=10000, random_state=random_state)
111
  )
112
  sgd_pipe = make_pipeline(
113
+ preprocessor,
114
+ SGDClassifier(
115
+ loss="hinge", penalty="l2", max_iter=10000, random_state=random_state
116
+ ),
117
  )
118
  lda_pipe = make_pipeline(preprocessor, QuadraticDiscriminantAnalysis())
119
  bnb_pipe = make_pipeline(preprocessor, BernoulliNB())
 
132
  ("dtree", dtree),
133
  ],
134
  voting="soft",
135
+ weights=[5, 2],
136
  ),
137
  )
138
  bag_pipe = make_pipeline(preprocessor, bag)
 
345
 
346
  ## Gradient Boosting Classifier
347
 
348
+ - Accuracy: {accuracy_score(y_test, gbc_pred)}
349
+ - Precision: {precision_score(y_test, gbc_pred)}
350
+ - Recall: {recall_score(y_test, gbc_pred)}
351
+ - F1: {f1_score(y_test, gbc_pred)}
352
+ - ROC-AUC: {roc_auc_score(y_test, gbc_pred)}
353
+ - Log Loss: {log_loss(y_test, gbc_pred)}
354
 
355
  ## Voting Classifier
356
 
 
370
  - ROC-AUC: {roc_auc_score(y_test, bag_pred)}
371
  - Log Loss: {log_loss(y_test, bag_pred)}
372
 
373
+ {
374
+ mo.callout(
375
+ "From the metrics, the Quadratic Discriminant Analysis and the Decision Tree Classifier perform the best, thus, they were chosen for the Voting Classifier",
376
+ kind="info",
377
+ )
378
+ }
379
  """)
380
  return (
381
  accuracy_score,
 
394
 
395
  @app.cell
396
  def _(mo):
397
+ user_inputs = mo.ui.dictionary(
398
+ {
399
+ "tenure": mo.ui.number(label="Tenure", start=1, stop=72, step=1),
400
+ "monthly_charges": mo.ui.number(
401
+ label="Monthly Charges", start=20, stop=120, step=1
402
+ ),
403
+ "total_charges": mo.ui.number(
404
+ label="Total Charges", start=20, stop=8000, step=1
405
+ ),
406
+ "contract": mo.ui.dropdown(
407
+ label="Contract (Year)", options=["None", "One", "Two"]
408
+ ),
409
+ "service": mo.ui.dropdown(
410
+ label="Service", options=["None", "Basic", "Fiber Optic"]
411
+ ),
412
+ }
413
+ )
414
 
415
+ mo.vstack(user_inputs.values())
416
+ return (user_inputs,)
417
+
418
+
419
+ @app.cell
420
+ def _(mo, pl, user_inputs, vot_pipe):
421
+ contract = None
422
+ service = None
423
+
424
+ match user_inputs["contract"].value:
425
+ case "None":
426
+ contract = "false_false"
427
+ case "One":
428
+ contract = "true_false"
429
+ case "Two":
430
+ contract = "false_true"
431
+ case _:
432
+ pass
433
+
434
+ match user_inputs["service"].value:
435
+ case "None":
436
+ service = "false_false"
437
+ case "Basic":
438
+ service = "true_false"
439
+ case "Fiber Optic":
440
+ service = "false_true"
441
+ case _:
442
+ pass
443
+
444
+ preds = pl.DataFrame({
445
+ "tenure": user_inputs["tenure"].value,
446
+ "monthly_charges": user_inputs["monthly_charges"].value,
447
+ "total_charges": user_inputs["total_charges"].value,
448
+ "contract_One Two year": contract,
449
+ "internet_service_Fiber No": service,
450
+ })
451
+
452
+ prediction = (vot_pipe.predict(preds), vot_pipe.predict_proba(preds))
453
+
454
+ mo.md(f"Prediction: {"Yes" if prediction[0][0] else "No" }, with about {prediction[1][0][0] * 100 if not prediction[0][0] else prediction[1][0][1] * 100:.2f}% probability.")
455
+ return contract, prediction, preds, service
456
 
457
 
458
  if __name__ == "__main__":