Spaces:

GVHD-UAE
/

GVHD_Prediction

Sleeping

App Files Files Community

mfarnas commited on Jul 7, 2025

Commit

ad12767

1 Parent(s): e3a752a

clean debug

Browse files

Files changed (5) hide show

src/model_utils.py +2 -85
src/model_utils_ori.py +0 -114
src/pages/1_Individual_Predictions.py +1 -4
src/pages/2_Bulk_Predictions.py +0 -4
src/sidebar.py +1 -26

src/model_utils.py CHANGED Viewed

@@ -5,12 +5,12 @@ from catboost import CatBoostClassifier
 # from lightgbm import LGBMClassifier
 from sklearn.ensemble import RandomForestClassifier
-# MODEL_DIR = Path("saved_models")
 # MODEL_DIR.mkdir(exist_ok=True)
 import yaml
-def load_model_params(model_type, mode="ensemble", path=Path("src/params") / "model_params.yaml"):
     if mode not in ["ensemble", "single_model"]:
         raise ValueError("mode must be either 'ensemble' or 'single_model'")
@@ -46,28 +46,9 @@ def get_model(model_type, mode="ensemble", best_iter=None):
     else:
         raise ValueError(f"Unsupported model type: {model_type}")
-# def save_model(model, user_model_name, metrics_result_single=None):
-#     timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
-#     filename = f"{timestamp}_{user_model_name}_single.pkl"
-#     filepath = MODEL_DIR / filename
-#     single_model_data = {
-#         "timestamp": timestamp,
-#         "model_name": user_model_name,
-#         "target_col": st.session_state.target_col if "target_col" in st.session_state else "UNKNOWN",
-#         "model": model,
-#         "best_iteration": st.session_state.best_iteration,
-#         "metrics_result_single": metrics_result_single
-#     }
-#     with open(filepath, "wb") as f:
-#         pickle.dump(single_model_data, f)
-#     return filename
 def save_model(model, user_model_name, metrics_result_single=None):
     from datetime import datetime
     import io
-    # import uuid
     import pickle
     import json
     import pyarrow as pa
@@ -135,33 +116,11 @@ def save_model(model, user_model_name, metrics_result_single=None):
         path_or_fileobj=buf
     )
-    print('filename SAVEEEEEEEEEEEEE', filename)
-    st.warning(f'SAVEEEEEEEEEEEEE {filename}')
     return filename
-# def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scores=None, metrics_result_ensemble=None):
-#     timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
-#     filename = f"{timestamp}_{user_model_name}_ensemble.pkl"
-#     filepath = MODEL_DIR / filename
-#     ensemble_data = {
-#         "timestamp": timestamp,
-#         "model_name": user_model_name,
-#         "target_col": st.session_state.target_col if "target_col" in st.session_state else "UNKNOWN",
-#         "models": models,
-#         "best_iterations": best_iterations,
-#         "fold_scores": fold_scores,
-#         "metrics_result_ensemble": metrics_result_ensemble
-#     }
-#     with open(filepath, "wb") as f:
-#         pickle.dump(ensemble_data, f)
-#     return filename
 def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scores=None, metrics_result_ensemble=None):
     from datetime import datetime
     import io
-    # import uuid
     import pickle
     import json
     import pyarrow as pa
@@ -225,21 +184,8 @@ def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scor
         path_or_fileobj=buf
     )
-    print('filename SAVEEEEEEEEEEEEEEEE', filename)
-    st.warning(f'SAVEEEEEEEEEEEEEEEEEEE {filename}')
     return filename
-# def load_model(model_name):
-#     filepath = MODEL_DIR / f"{model_name}.pkl"
-#     if not filepath.exists():
-#         raise FileNotFoundError(f"Model file not found: {filepath}")
-#     with open(filepath, "rb") as f:
-#         single_model_data = pickle.load(f)
-#     return single_model_data
 def load_model(model_name):
     from huggingface_hub import login, hf_hub_download
     import pyarrow.parquet as pq
@@ -250,18 +196,6 @@ def load_model(model_name):
     if "HF_TOKEN" in os.environ:
         login(token=os.environ["HF_TOKEN"])
-    # files = hf_hub_download(
-    #     repo_id=os.environ["HF_REPO_ID"],
-    #     repo_type="dataset",
-    #     token=os.environ["HF_TOKEN"],
-    #     filename=None,  # Get whole repo listing
-    #     cache_dir=None,
-    #     local_dir=None,
-    #     local_dir_use_symlinks=False,
-    #     force_download=False,
-    #     resume_download=True
-    # )
     from huggingface_hub import HfApi
     api = HfApi(token=os.environ["HF_TOKEN"])
     all_files = api.list_repo_files(repo_id=os.environ["HF_REPO_ID"], repo_type="dataset")
@@ -277,8 +211,6 @@ def load_model(model_name):
             token=os.environ["HF_TOKEN"]
         )
         table = pq.read_table(downloaded)
-        print("tableeeeeee")
-        st.dataframe(table)
         row = table.to_pylist()[0]
         if row["filename"] == model_name.replace("parquet", "pkl"):
             target_file = downloaded
@@ -289,26 +221,11 @@ def load_model(model_name):
     model_bytes = row["model_file"]["bytes"]
-    print("LOADDDDDDDDDDDDDDDDDDDDDDDDDD")
-    print('row["filename"]', row["filename"])
-    print('model_name.replace("parquet", "pkl")', model_name.replace("parquet", "pkl"))
     return pickle.loads(model_bytes)
-# def load_model_ensemble(filename):
-#     filepath = MODEL_DIR / f"{filename}.pkl"
-#     if not filepath.exists():
-#         raise FileNotFoundError(f"Model file not found: {filepath}")
-#     with open(filepath, "rb") as f:
-#         ensemble_data = pickle.load(f)
-#     return ensemble_data
 def load_model_ensemble(filename):
     return load_model(filename)
 def ensemble_predict(models, X, cat_features):
     preds = sum([model.predict_proba(X)[:, 1] for model in models]) / len(models)
     return preds

 # from lightgbm import LGBMClassifier
 from sklearn.ensemble import RandomForestClassifier
+MODEL_DIR = Path("src/params")
 # MODEL_DIR.mkdir(exist_ok=True)
 import yaml
+def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
     if mode not in ["ensemble", "single_model"]:
         raise ValueError("mode must be either 'ensemble' or 'single_model'")
     else:
         raise ValueError(f"Unsupported model type: {model_type}")
 def save_model(model, user_model_name, metrics_result_single=None):
     from datetime import datetime
     import io
     import pickle
     import json
     import pyarrow as pa
         path_or_fileobj=buf
     )
     return filename
 def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scores=None, metrics_result_ensemble=None):
     from datetime import datetime
     import io
     import pickle
     import json
     import pyarrow as pa
         path_or_fileobj=buf
     )
     return filename
 def load_model(model_name):
     from huggingface_hub import login, hf_hub_download
     import pyarrow.parquet as pq
     if "HF_TOKEN" in os.environ:
         login(token=os.environ["HF_TOKEN"])
     from huggingface_hub import HfApi
     api = HfApi(token=os.environ["HF_TOKEN"])
     all_files = api.list_repo_files(repo_id=os.environ["HF_REPO_ID"], repo_type="dataset")
             token=os.environ["HF_TOKEN"]
         )
         table = pq.read_table(downloaded)
         row = table.to_pylist()[0]
         if row["filename"] == model_name.replace("parquet", "pkl"):
             target_file = downloaded
     model_bytes = row["model_file"]["bytes"]
     return pickle.loads(model_bytes)
 def load_model_ensemble(filename):
     return load_model(filename)
 def ensemble_predict(models, X, cat_features):
     preds = sum([model.predict_proba(X)[:, 1] for model in models]) / len(models)
     return preds

src/model_utils_ori.py DELETED Viewed

@@ -1,114 +0,0 @@
-import streamlit as st
-import pickle
-import catboost
-from datetime import datetime
-from pathlib import Path
-from catboost import CatBoostClassifier
-# from xgboost import XGBClassifier
-# from lightgbm import LGBMClassifier
-from sklearn.ensemble import RandomForestClassifier
-MODEL_DIR = Path("saved_models")
-MODEL_DIR.mkdir(exist_ok=True)
-import yaml
-def load_model_params(model_type, mode="ensemble", path=Path("params") / "model_params.yaml"):
-    if mode not in ["ensemble", "single_model"]:
-        raise ValueError("mode must be either 'ensemble' or 'single_model'")
-    if model_type not in ["CatBoost", "XGBoost", "LightGBM", "RandomForest"]:
-        raise ValueError("model_type must be one of 'CatBoost', 'XGBoost', 'LightGBM', or 'RandomForest'")
-    with open(path, "r") as f:
-        all_params = yaml.safe_load(f)
-    params = all_params[model_type][mode]
-    if "random_seed" in params:
-        st.session_state.random_seed = params["random_seed"]
-    return params
-def get_model(model_type, mode="ensemble", best_iter=None):
-    params = load_model_params(model_type, mode)
-    # iter is set for single_model mode, where
-    if best_iter is not None:
-        params['iterations'] = best_iter
-    # if "random_seed" in st.session_state:
-    #     random_seed = st.session_state.random_seed
-    if model_type == "CatBoost":
-        return CatBoostClassifier(**params)
-    # elif model_type == "XGBoost":
-    #     return XGBClassifier(**params, use_label_encoder=False, eval_metric="logloss")
-    # elif model_type == "LightGBM":
-    #     return LGBMClassifier(**params)
-    elif model_type == "RandomForest":
-        return RandomForestClassifier(**params)
-    else:
-        raise ValueError(f"Unsupported model type: {model_type}")
-def save_model(model, user_model_name, metrics_result_single=None):
-    timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
-    filename = f"{timestamp}_{user_model_name}_single.pkl"
-    filepath = MODEL_DIR / filename
-    single_model_data = {
-        "timestamp": timestamp,
-        "model_name": user_model_name,
-        "target_col": st.session_state.target_col if "target_col" in st.session_state else "UNKNOWN",
-        "model": model,
-        "best_iteration": st.session_state.best_iteration,
-        "metrics_result_single": metrics_result_single
-    }
-    with open(filepath, "wb") as f:
-        pickle.dump(single_model_data, f)
-    return filename
-def load_model(model_name):
-    filepath = MODEL_DIR / f"{model_name}.pkl"
-    if not filepath.exists():
-        raise FileNotFoundError(f"Model file not found: {filepath}")
-    with open(filepath, "rb") as f:
-        single_model_data = pickle.load(f)
-    return single_model_data
-def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scores=None, metrics_result_ensemble=None):
-    timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
-    filename = f"{timestamp}_{user_model_name}_ensemble.pkl"
-    filepath = MODEL_DIR / filename
-    ensemble_data = {
-        "timestamp": timestamp,
-        "model_name": user_model_name,
-        "target_col": st.session_state.target_col if "target_col" in st.session_state else "UNKNOWN",
-        "models": models,
-        "best_iterations": best_iterations,
-        "fold_scores": fold_scores,
-        "metrics_result_ensemble": metrics_result_ensemble
-    }
-    with open(filepath, "wb") as f:
-        pickle.dump(ensemble_data, f)
-    return filename
-def load_model_ensemble(filename):
-    filepath = MODEL_DIR / f"{filename}.pkl"
-    if not filepath.exists():
-        raise FileNotFoundError(f"Model file not found: {filepath}")
-    with open(filepath, "rb") as f:
-        ensemble_data = pickle.load(f)
-    return ensemble_data
-def ensemble_predict(models, X, cat_features):
-    preds = sum([model.predict_proba(X)[:, 1] for model in models]) / len(models)
-    return preds

src/pages/1_Individual_Predictions.py CHANGED Viewed

@@ -90,7 +90,6 @@ with st.form("individual_form"):
         'CYTARABINE',
         'ETOPOSIDE',
         'FLUDARABINE',
-        # 'GEMCITABIBE',
         'GEMCITABINE',
         'MELPHALAN',
         'METHOTREXATE',
@@ -187,9 +186,7 @@ if submitted:
         pred = 0.0
     else:
         if "ensemble" in st.session_state.selected_model:
-            # ensemble = True
-            # ensemble model prediction
-            # if ensemble:
             models = load_model_ensemble(st.session_state.selected_model)
             models = models["model"]
             pred = ensemble_predict(models, X, cat_features)

         'CYTARABINE',
         'ETOPOSIDE',
         'FLUDARABINE',
         'GEMCITABINE',
         'MELPHALAN',
         'METHOTREXATE',
         pred = 0.0
     else:
         if "ensemble" in st.session_state.selected_model:
+            # ensemble prediction
             models = load_model_ensemble(st.session_state.selected_model)
             models = models["model"]
             pred = ensemble_predict(models, X, cat_features)

src/pages/2_Bulk_Predictions.py CHANGED Viewed

@@ -11,8 +11,6 @@ sidebar()
 st.title("📊 Bulk Patient Predictions")
-# training_preproc_cols = []
 uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
 if uploaded_file:
     df = pd.read_csv(uploaded_file, header=1)
@@ -21,7 +19,6 @@ if uploaded_file:
     if st.button("Preprocess"):
         df_proc = preprocess(df)
-        # print('df_proc', df_proc.columns)  # Debugging line to check processed columns
         edited_df = st.data_editor(df_proc, num_rows="dynamic")
         st.session_state.bulk_input_df = edited_df
@@ -39,7 +36,6 @@ if uploaded_file:
                     st.session_state.best_iterations = ensemble_data.get("best_iterations", [])
                     st.session_state.fold_scores = ensemble_data.get("fold_scores", [])
-                    # st.success(f"Loaded ensemble: {ensemble_data['model_name']} from {ensemble_data['timestamp']}")
                 except Exception as e:
                     st.error(f"Error loading ensemble: {str(e)}")
             else:

 st.title("📊 Bulk Patient Predictions")
 uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
 if uploaded_file:
     df = pd.read_csv(uploaded_file, header=1)
     if st.button("Preprocess"):
         df_proc = preprocess(df)
         edited_df = st.data_editor(df_proc, num_rows="dynamic")
         st.session_state.bulk_input_df = edited_df
                     st.session_state.best_iterations = ensemble_data.get("best_iterations", [])
                     st.session_state.fold_scores = ensemble_data.get("fold_scores", [])
                 except Exception as e:
                     st.error(f"Error loading ensemble: {str(e)}")
             else:

src/sidebar.py CHANGED Viewed

@@ -7,25 +7,6 @@ import pyarrow.parquet as pq
 st.session_state.orig_train_cols = ['EPI/ID numbers', 'Recipient_gender', 'Recepient_DOB', 'Recepient_Nationality', 'Hematological Diagnosis', 'Date of first diagnosis/BMBx date', 'Recepient_Blood group before HSCT', 'Donor_DOB', 'Donor_gender', 'D_Blood group', 'R_HLA_A', 'R_HLA_B', 'R_HLA_C', 'R_HLA_DR', 'R_HLA_DQ', 'D_HLA_A', 'D_HLA_B', 'D_HLA_C', 'D_HLA_DR', 'D_HLA_DQ', 'Number of lines of Rx before HSCT', 'PreHSCT conditioning regimen+/-ATG+/-TBI', 'HSCT_date', 'Source of cells', 'Donor_relation to recipient', 'HLA match ratio', 'Post HSCT regimen', 'First_GVHD prophylaxis', 'GVHD', 'Acute GVHD(<100 days)', 'Chronic GVHD>100 days', 'Acute+Chronic', 'GVHD severity', 'R_HLA_A1', 'R_HLA_A2', 'R_HLA_B1', 'R_HLA_B2', 'R_HLA_C1', 'R_HLA_C2', 'R_HLA_DR1', 'R_HLA_DR2', 'R_HLA_DQ1', 'R_HLA_DQ2', 'D_HLA_A1', 'D_HLA_A2', 'D_HLA_B1', 'D_HLA_B2', 'D_HLA_C1', 'D_HLA_C2', 'D_HLA_DR1', 'D_HLA_DR2', 'D_HLA_DQ1', 'D_HLA_DQ2', 'R_HLA_A_1', 'R_HLA_A_11', 'R_HLA_A_12', 'R_HLA_A_2', 'R_HLA_A_20', 'R_HLA_A_23', 'R_HLA_A_24', 'R_HLA_A_25', 'R_HLA_A_26', 'R_HLA_A_29', 'R_HLA_A_3', 'R_HLA_A_30', 'R_HLA_A_31', 'R_HLA_A_32', 'R_HLA_A_33', 'R_HLA_A_34', 'R_HLA_A_4', 'R_HLA_A_66', 'R_HLA_A_68', 'R_HLA_A_69', 'R_HLA_A_7', 'R_HLA_A_74', 'R_HLA_A_8', 'R_HLA_A_X', 'R_HLA_B_13', 'R_HLA_B_14', 'R_HLA_B_15', 'R_HLA_B_18', 'R_HLA_B_23', 'R_HLA_B_24', 'R_HLA_B_27', 'R_HLA_B_35', 'R_HLA_B_37', 'R_HLA_B_38', 'R_HLA_B_39', 'R_HLA_B_40', 'R_HLA_B_41', 'R_HLA_B_42', 'R_HLA_B_44', 'R_HLA_B_45', 'R_HLA_B_46', 'R_HLA_B_49', 'R_HLA_B_50', 'R_HLA_B_51', 'R_HLA_B_52', 'R_HLA_B_53', 'R_HLA_B_55', 'R_HLA_B_56', 'R_HLA_B_57', 'R_HLA_B_58', 'R_HLA_B_7', 'R_HLA_B_73', 'R_HLA_B_8', 'R_HLA_B_81', 'R_HLA_B_X', 'R_HLA_C_1', 'R_HLA_C_12', 'R_HLA_C_14', 'R_HLA_C_15', 'R_HLA_C_16', 'R_HLA_C_17', 'R_HLA_C_18', 'R_HLA_C_2', 'R_HLA_C_3', 'R_HLA_C_38', 'R_HLA_C_4', 'R_HLA_C_49', 'R_HLA_C_5', 'R_HLA_C_50', 'R_HLA_C_6', 'R_HLA_C_7', 'R_HLA_C_8', 'R_HLA_C_X', 'R_HLA_DR_1', 'R_HLA_DR_10', 'R_HLA_DR_11', 'R_HLA_DR_12', 'R_HLA_DR_13', 'R_HLA_DR_14', 'R_HLA_DR_15', 'R_HLA_DR_16', 'R_HLA_DR_17', 'R_HLA_DR_2', 'R_HLA_DR_3', 'R_HLA_DR_4', 'R_HLA_DR_5', 'R_HLA_DR_6', 'R_HLA_DR_7', 'R_HLA_DR_8', 'R_HLA_DR_9', 'R_HLA_DR_X', 'R_HLA_DQ_1', 'R_HLA_DQ_11', 'R_HLA_DQ_15', 'R_HLA_DQ_16', 'R_HLA_DQ_2', 'R_HLA_DQ_3', 'R_HLA_DQ_301', 'R_HLA_DQ_4', 'R_HLA_DQ_5', 'R_HLA_DQ_6', 'R_HLA_DQ_7', 'R_HLA_DQ_X', 'D_HLA_A_1', 'D_HLA_A_11', 'D_HLA_A_12', 'D_HLA_A_2', 'D_HLA_A_23', 'D_HLA_A_24', 'D_HLA_A_25', 'D_HLA_A_26', 'D_HLA_A_29', 'D_HLA_A_3', 'D_HLA_A_30', 'D_HLA_A_31', 'D_HLA_A_32', 'D_HLA_A_33', 'D_HLA_A_34', 'D_HLA_A_66', 'D_HLA_A_68', 'D_HLA_A_69', 'D_HLA_A_7', 'D_HLA_A_74', 'D_HLA_A_8', 'D_HLA_A_X', 'D_HLA_B_13', 'D_HLA_B_14', 'D_HLA_B_15', 'D_HLA_B_17', 'D_HLA_B_18', 'D_HLA_B_23', 'D_HLA_B_24', 'D_HLA_B_27', 'D_HLA_B_35', 'D_HLA_B_37', 'D_HLA_B_38', 'D_HLA_B_39', 'D_HLA_B_40', 'D_HLA_B_41', 'D_HLA_B_42', 'D_HLA_B_44', 'D_HLA_B_45', 'D_HLA_B_48', 'D_HLA_B_49', 'D_HLA_B_50', 'D_HLA_B_51', 'D_HLA_B_52', 'D_HLA_B_53', 'D_HLA_B_55', 'D_HLA_B_56', 'D_HLA_B_57', 'D_HLA_B_58', 'D_HLA_B_7', 'D_HLA_B_73', 'D_HLA_B_8', 'D_HLA_B_81', 'D_HLA_B_X', 'D_HLA_C_1', 'D_HLA_C_12', 'D_HLA_C_14', 'D_HLA_C_15', 'D_HLA_C_16', 'D_HLA_C_17', 'D_HLA_C_18', 'D_HLA_C_2', 'D_HLA_C_3', 'D_HLA_C_38', 'D_HLA_C_4', 'D_HLA_C_49', 'D_HLA_C_5', 'D_HLA_C_50', 'D_HLA_C_6', 'D_HLA_C_7', 'D_HLA_C_8', 'D_HLA_C_X', 'D_HLA_DR_1', 'D_HLA_DR_10', 'D_HLA_DR_11', 'D_HLA_DR_12', 'D_HLA_DR_13', 'D_HLA_DR_14', 'D_HLA_DR_15', 'D_HLA_DR_16', 'D_HLA_DR_17', 'D_HLA_DR_2', 'D_HLA_DR_3', 'D_HLA_DR_4', 'D_HLA_DR_5', 'D_HLA_DR_6', 'D_HLA_DR_7', 'D_HLA_DR_8', 'D_HLA_DR_9', 'D_HLA_DR_X', 'D_HLA_DQ_1', 'D_HLA_DQ_11', 'D_HLA_DQ_15', 'D_HLA_DQ_16', 'D_HLA_DQ_2', 'D_HLA_DQ_3', 'D_HLA_DQ_301', 'D_HLA_DQ_4', 'D_HLA_DQ_5', 'D_HLA_DQ_6', 'D_HLA_DQ_7', 'D_HLA_DQ_X', 'Recepient_DOB_Year', 'Donor_DOB_Year', 'HSCT_date_Year', 'R_Age_at_transplant', 'D_Age_at_transplant', 'Age_Gap_R_D', 'PreHSCT_ALEMTUZUMAB', 'PreHSCT_ATG', 'PreHSCT_BEAM', 'PreHSCT_BUSULFAN', 'PreHSCT_CAMPATH', 'PreHSCT_CARMUSTINE', 'PreHSCT_CLOFARABINE', 'PreHSCT_CYCLOPHOSPHAMIDE', 'PreHSCT_CYCLOSPORIN', 'PreHSCT_CYTARABINE', 'PreHSCT_ETOPOSIDE', 'PreHSCT_FLUDARABINE', 'PreHSCT_GEMCITABINE', 'PreHSCT_MELPHALAN', 'PreHSCT_METHOTREXATE', 'PreHSCT_OTHER', 'PreHSCT_RANIMUSTINE', 'PreHSCT_REDUCEDCONDITIONING', 'PreHSCT_RITUXIMAB', 'PreHSCT_SIROLIMUS', 'PreHSCT_TBI', 'PreHSCT_THIOTEPA', 'PreHSCT_TREOSULFAN', 'PreHSCT_UA', 'PreHSCT_VORNOSTAT', 'PreHSCT_X', 'First_GVHD_prophylaxis_ABATACEPT', 'First_GVHD_prophylaxis_ALEMTUZUMAB', 'First_GVHD_prophylaxis_ATG', 'First_GVHD_prophylaxis_CYCLOPHOSPHAMIDE', 'First_GVHD_prophylaxis_CYCLOSPORIN', 'First_GVHD_prophylaxis_IMATINIB', 'First_GVHD_prophylaxis_LEFLUNOMIDE', 'First_GVHD_prophylaxis_METHOTREXATE', 'First_GVHD_prophylaxis_MMF', 'First_GVHD_prophylaxis_NONE', 'First_GVHD_prophylaxis_RUXOLITINIB', 'First_GVHD_prophylaxis_SIROLIMUS', 'First_GVHD_prophylaxis_STEROID', 'First_GVHD_prophylaxis_TAC', 'First_GVHD_prophylaxis_TACROLIMUS', 'First_GVHD_prophylaxis_X', 'Recepient_Blood group before HSCT_MergePlusMinus', 'D_Blood group_MergePlusMinus', 'R_Age_at_transplant_cutoff16', 'R_Age_at_transplant_cutoff18', 'D_Age_at_transplant_cutoff16', 'D_Age_at_transplant_cutoff18', 'Relation_and_Recipient_Gender', 'Relation_and_Donor_Gender', 'Relation_and_Recipient_and_Donor_Gender', 'Recepient_Nationality_Geographical', 'Recepient_Nationality_Cultural', 'Recepient_Nationality_Regional_Income', 'Recepient_Nationality_Regional_WHO', 'Hematological Diagnosis_Grouped', 'Hematological Diagnosis_Malignant', 'PreHSCT_MTX', 'First_GVHD_prophylaxis_MTX']
-# def sidebar():
-#     APP_DIR = Path(__file__).parent
-#     MODELS_DIR = APP_DIR / "saved_models"
-#     # Shared dropdown in the sidebar
-#     def get_model_options():
-#         models = ["Default"]
-#         model_files = glob.glob(str(MODELS_DIR / "*.pkl")) + glob.glob(str(MODELS_DIR / "*.cbm"))
-#         for m in model_files:
-#             models.append(Path(m).stem)
-#         return sorted(set(models))
-#     if 'selected_model' not in st.session_state:
-#         st.session_state.selected_model = "Default"
-#     st.sidebar.title("Model Selection")
-#     st.session_state.selected_model = st.sidebar.selectbox("Model", get_model_options())
 def sidebar():
     def get_model_options():
         models = ["Default_ensemble"]
@@ -48,16 +29,10 @@ def sidebar():
             except Exception as e:
                 st.warning(f"Skipping model file due to error: {f} ({e})")
-        # todel
-        print(sorted(set(models)))
-        st.warning(sorted(set(models)))
         return sorted(set(models))
     if 'selected_model' not in st.session_state:
         st.session_state.selected_model = "Default_ensemble"
     st.sidebar.title("Model Selection")
-    st.session_state.selected_model = st.sidebar.selectbox("Model", get_model_options())
-    # todel
-    st.info(f"{st.session_state.selected_model} is chosen!")

 st.session_state.orig_train_cols = ['EPI/ID numbers', 'Recipient_gender', 'Recepient_DOB', 'Recepient_Nationality', 'Hematological Diagnosis', 'Date of first diagnosis/BMBx date', 'Recepient_Blood group before HSCT', 'Donor_DOB', 'Donor_gender', 'D_Blood group', 'R_HLA_A', 'R_HLA_B', 'R_HLA_C', 'R_HLA_DR', 'R_HLA_DQ', 'D_HLA_A', 'D_HLA_B', 'D_HLA_C', 'D_HLA_DR', 'D_HLA_DQ', 'Number of lines of Rx before HSCT', 'PreHSCT conditioning regimen+/-ATG+/-TBI', 'HSCT_date', 'Source of cells', 'Donor_relation to recipient', 'HLA match ratio', 'Post HSCT regimen', 'First_GVHD prophylaxis', 'GVHD', 'Acute GVHD(<100 days)', 'Chronic GVHD>100 days', 'Acute+Chronic', 'GVHD severity', 'R_HLA_A1', 'R_HLA_A2', 'R_HLA_B1', 'R_HLA_B2', 'R_HLA_C1', 'R_HLA_C2', 'R_HLA_DR1', 'R_HLA_DR2', 'R_HLA_DQ1', 'R_HLA_DQ2', 'D_HLA_A1', 'D_HLA_A2', 'D_HLA_B1', 'D_HLA_B2', 'D_HLA_C1', 'D_HLA_C2', 'D_HLA_DR1', 'D_HLA_DR2', 'D_HLA_DQ1', 'D_HLA_DQ2', 'R_HLA_A_1', 'R_HLA_A_11', 'R_HLA_A_12', 'R_HLA_A_2', 'R_HLA_A_20', 'R_HLA_A_23', 'R_HLA_A_24', 'R_HLA_A_25', 'R_HLA_A_26', 'R_HLA_A_29', 'R_HLA_A_3', 'R_HLA_A_30', 'R_HLA_A_31', 'R_HLA_A_32', 'R_HLA_A_33', 'R_HLA_A_34', 'R_HLA_A_4', 'R_HLA_A_66', 'R_HLA_A_68', 'R_HLA_A_69', 'R_HLA_A_7', 'R_HLA_A_74', 'R_HLA_A_8', 'R_HLA_A_X', 'R_HLA_B_13', 'R_HLA_B_14', 'R_HLA_B_15', 'R_HLA_B_18', 'R_HLA_B_23', 'R_HLA_B_24', 'R_HLA_B_27', 'R_HLA_B_35', 'R_HLA_B_37', 'R_HLA_B_38', 'R_HLA_B_39', 'R_HLA_B_40', 'R_HLA_B_41', 'R_HLA_B_42', 'R_HLA_B_44', 'R_HLA_B_45', 'R_HLA_B_46', 'R_HLA_B_49', 'R_HLA_B_50', 'R_HLA_B_51', 'R_HLA_B_52', 'R_HLA_B_53', 'R_HLA_B_55', 'R_HLA_B_56', 'R_HLA_B_57', 'R_HLA_B_58', 'R_HLA_B_7', 'R_HLA_B_73', 'R_HLA_B_8', 'R_HLA_B_81', 'R_HLA_B_X', 'R_HLA_C_1', 'R_HLA_C_12', 'R_HLA_C_14', 'R_HLA_C_15', 'R_HLA_C_16', 'R_HLA_C_17', 'R_HLA_C_18', 'R_HLA_C_2', 'R_HLA_C_3', 'R_HLA_C_38', 'R_HLA_C_4', 'R_HLA_C_49', 'R_HLA_C_5', 'R_HLA_C_50', 'R_HLA_C_6', 'R_HLA_C_7', 'R_HLA_C_8', 'R_HLA_C_X', 'R_HLA_DR_1', 'R_HLA_DR_10', 'R_HLA_DR_11', 'R_HLA_DR_12', 'R_HLA_DR_13', 'R_HLA_DR_14', 'R_HLA_DR_15', 'R_HLA_DR_16', 'R_HLA_DR_17', 'R_HLA_DR_2', 'R_HLA_DR_3', 'R_HLA_DR_4', 'R_HLA_DR_5', 'R_HLA_DR_6', 'R_HLA_DR_7', 'R_HLA_DR_8', 'R_HLA_DR_9', 'R_HLA_DR_X', 'R_HLA_DQ_1', 'R_HLA_DQ_11', 'R_HLA_DQ_15', 'R_HLA_DQ_16', 'R_HLA_DQ_2', 'R_HLA_DQ_3', 'R_HLA_DQ_301', 'R_HLA_DQ_4', 'R_HLA_DQ_5', 'R_HLA_DQ_6', 'R_HLA_DQ_7', 'R_HLA_DQ_X', 'D_HLA_A_1', 'D_HLA_A_11', 'D_HLA_A_12', 'D_HLA_A_2', 'D_HLA_A_23', 'D_HLA_A_24', 'D_HLA_A_25', 'D_HLA_A_26', 'D_HLA_A_29', 'D_HLA_A_3', 'D_HLA_A_30', 'D_HLA_A_31', 'D_HLA_A_32', 'D_HLA_A_33', 'D_HLA_A_34', 'D_HLA_A_66', 'D_HLA_A_68', 'D_HLA_A_69', 'D_HLA_A_7', 'D_HLA_A_74', 'D_HLA_A_8', 'D_HLA_A_X', 'D_HLA_B_13', 'D_HLA_B_14', 'D_HLA_B_15', 'D_HLA_B_17', 'D_HLA_B_18', 'D_HLA_B_23', 'D_HLA_B_24', 'D_HLA_B_27', 'D_HLA_B_35', 'D_HLA_B_37', 'D_HLA_B_38', 'D_HLA_B_39', 'D_HLA_B_40', 'D_HLA_B_41', 'D_HLA_B_42', 'D_HLA_B_44', 'D_HLA_B_45', 'D_HLA_B_48', 'D_HLA_B_49', 'D_HLA_B_50', 'D_HLA_B_51', 'D_HLA_B_52', 'D_HLA_B_53', 'D_HLA_B_55', 'D_HLA_B_56', 'D_HLA_B_57', 'D_HLA_B_58', 'D_HLA_B_7', 'D_HLA_B_73', 'D_HLA_B_8', 'D_HLA_B_81', 'D_HLA_B_X', 'D_HLA_C_1', 'D_HLA_C_12', 'D_HLA_C_14', 'D_HLA_C_15', 'D_HLA_C_16', 'D_HLA_C_17', 'D_HLA_C_18', 'D_HLA_C_2', 'D_HLA_C_3', 'D_HLA_C_38', 'D_HLA_C_4', 'D_HLA_C_49', 'D_HLA_C_5', 'D_HLA_C_50', 'D_HLA_C_6', 'D_HLA_C_7', 'D_HLA_C_8', 'D_HLA_C_X', 'D_HLA_DR_1', 'D_HLA_DR_10', 'D_HLA_DR_11', 'D_HLA_DR_12', 'D_HLA_DR_13', 'D_HLA_DR_14', 'D_HLA_DR_15', 'D_HLA_DR_16', 'D_HLA_DR_17', 'D_HLA_DR_2', 'D_HLA_DR_3', 'D_HLA_DR_4', 'D_HLA_DR_5', 'D_HLA_DR_6', 'D_HLA_DR_7', 'D_HLA_DR_8', 'D_HLA_DR_9', 'D_HLA_DR_X', 'D_HLA_DQ_1', 'D_HLA_DQ_11', 'D_HLA_DQ_15', 'D_HLA_DQ_16', 'D_HLA_DQ_2', 'D_HLA_DQ_3', 'D_HLA_DQ_301', 'D_HLA_DQ_4', 'D_HLA_DQ_5', 'D_HLA_DQ_6', 'D_HLA_DQ_7', 'D_HLA_DQ_X', 'Recepient_DOB_Year', 'Donor_DOB_Year', 'HSCT_date_Year', 'R_Age_at_transplant', 'D_Age_at_transplant', 'Age_Gap_R_D', 'PreHSCT_ALEMTUZUMAB', 'PreHSCT_ATG', 'PreHSCT_BEAM', 'PreHSCT_BUSULFAN', 'PreHSCT_CAMPATH', 'PreHSCT_CARMUSTINE', 'PreHSCT_CLOFARABINE', 'PreHSCT_CYCLOPHOSPHAMIDE', 'PreHSCT_CYCLOSPORIN', 'PreHSCT_CYTARABINE', 'PreHSCT_ETOPOSIDE', 'PreHSCT_FLUDARABINE', 'PreHSCT_GEMCITABINE', 'PreHSCT_MELPHALAN', 'PreHSCT_METHOTREXATE', 'PreHSCT_OTHER', 'PreHSCT_RANIMUSTINE', 'PreHSCT_REDUCEDCONDITIONING', 'PreHSCT_RITUXIMAB', 'PreHSCT_SIROLIMUS', 'PreHSCT_TBI', 'PreHSCT_THIOTEPA', 'PreHSCT_TREOSULFAN', 'PreHSCT_UA', 'PreHSCT_VORNOSTAT', 'PreHSCT_X', 'First_GVHD_prophylaxis_ABATACEPT', 'First_GVHD_prophylaxis_ALEMTUZUMAB', 'First_GVHD_prophylaxis_ATG', 'First_GVHD_prophylaxis_CYCLOPHOSPHAMIDE', 'First_GVHD_prophylaxis_CYCLOSPORIN', 'First_GVHD_prophylaxis_IMATINIB', 'First_GVHD_prophylaxis_LEFLUNOMIDE', 'First_GVHD_prophylaxis_METHOTREXATE', 'First_GVHD_prophylaxis_MMF', 'First_GVHD_prophylaxis_NONE', 'First_GVHD_prophylaxis_RUXOLITINIB', 'First_GVHD_prophylaxis_SIROLIMUS', 'First_GVHD_prophylaxis_STEROID', 'First_GVHD_prophylaxis_TAC', 'First_GVHD_prophylaxis_TACROLIMUS', 'First_GVHD_prophylaxis_X', 'Recepient_Blood group before HSCT_MergePlusMinus', 'D_Blood group_MergePlusMinus', 'R_Age_at_transplant_cutoff16', 'R_Age_at_transplant_cutoff18', 'D_Age_at_transplant_cutoff16', 'D_Age_at_transplant_cutoff18', 'Relation_and_Recipient_Gender', 'Relation_and_Donor_Gender', 'Relation_and_Recipient_and_Donor_Gender', 'Recepient_Nationality_Geographical', 'Recepient_Nationality_Cultural', 'Recepient_Nationality_Regional_Income', 'Recepient_Nationality_Regional_WHO', 'Hematological Diagnosis_Grouped', 'Hematological Diagnosis_Malignant', 'PreHSCT_MTX', 'First_GVHD_prophylaxis_MTX']
 def sidebar():
     def get_model_options():
         models = ["Default_ensemble"]
             except Exception as e:
                 st.warning(f"Skipping model file due to error: {f} ({e})")
         return sorted(set(models))
     if 'selected_model' not in st.session_state:
         st.session_state.selected_model = "Default_ensemble"
     st.sidebar.title("Model Selection")
+    st.session_state.selected_model = st.sidebar.selectbox("Model", get_model_options())