Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| from sklearn.linear_model import LogisticRegression, SGDClassifier | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.naive_bayes import GaussianNB | |
| from xgboost import XGBClassifier | |
| from sklearn.metrics import ( | |
| accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss, | |
| ) | |
| import shap | |
| def train_tree_models(_X_train, _y_train): | |
| models = { | |
| "Random Forest": RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1), | |
| "XGBoost": XGBClassifier( | |
| n_estimators=200, max_depth=5, learning_rate=0.1, | |
| random_state=42, eval_metric="logloss", | |
| ), | |
| } | |
| for model in models.values(): | |
| model.fit(_X_train, _y_train) | |
| return models | |
| def train_lr_model(_X_train_oh, _y_train): | |
| lr = LogisticRegression(max_iter=2000, random_state=42) | |
| lr.fit(_X_train_oh, _y_train) | |
| return lr | |
| def train_nb_model(_X_train, _y_train): | |
| """Train Gaussian Naive Bayes - assumes features are independent.""" | |
| nb = GaussianNB() | |
| nb.fit(_X_train, _y_train) | |
| return nb | |
| def evaluate_model(model, X_test, y_test) -> dict: | |
| y_pred = model.predict(X_test) | |
| if hasattr(model, "predict_proba"): | |
| y_proba = model.predict_proba(X_test)[:, 1] | |
| else: | |
| y_proba = model.decision_function(X_test) | |
| return { | |
| "Accuracy": float(accuracy_score(y_test, y_pred)), | |
| "Precision": float(precision_score(y_test, y_pred, zero_division=0)), | |
| "Recall": float(recall_score(y_test, y_pred, zero_division=0)), | |
| "F1 Score": float(f1_score(y_test, y_pred, zero_division=0)), | |
| "AUC": float(roc_auc_score(y_test, y_proba)), | |
| } | |
| def evaluate_all_models(models: dict, X_test, y_test) -> dict: | |
| return {name: evaluate_model(m, X_test, y_test) for name, m in models.items()} | |
| def get_shap_explainer(_model, _X_train): | |
| explainer = shap.TreeExplainer(_model) | |
| shap_values = explainer(_X_train) | |
| return explainer, shap_values | |
| def get_shap_single(explainer, X_single): | |
| return explainer(X_single) | |
| def create_sgd_model(classes=None): | |
| model = SGDClassifier( | |
| loss="log_loss", penalty="l2", alpha=0.0001, | |
| random_state=42, warm_start=False, | |
| ) | |
| if classes is not None: | |
| model.partial_fit( | |
| np.zeros((1, 1)), np.array([classes[0]]), classes=classes | |
| ) | |
| return model | |
| def evaluate_streaming(model, X_test, y_test) -> dict: | |
| y_pred = model.predict(X_test) | |
| y_proba = model.predict_proba(X_test)[:, 1] | |
| return { | |
| "Accuracy": accuracy_score(y_test, y_pred), | |
| "F1 Score": f1_score(y_test, y_pred, zero_division=0), | |
| "Log Loss": log_loss(y_test, y_proba), | |
| } | |