import os # For filesystem operations import shutil # For directory cleanup import zipfile # For extracting model archives import pathlib # For path manipulations import pandas # For tabular data handling import gradio # For interactive UI import huggingface_hub # For downloading model assets import autogluon.tabular # For loading and running AutoGluon predictors from huggingface_hub import HfApi # Settings api = HfApi() MODEL_REPO_ID = "jennifee/classical_automl_model" ZIP_FILENAME = "autogluon_predictor_dir.zip" CACHE_DIR = pathlib.Path("hf_assets") EXTRACT_DIR = CACHE_DIR / "predictor_native" # Feature column names and target column names FEATURE_COLS = ['phone_hours', 'computer_hours', 'device_count', 'use_before_bed', 'sleep_time', 'sleep_hours' ] TARGET_COL = "sleep_quality" # Encoding for likert questions # Encoding for likert questions LIKERT5_LABELS = ["Never", "Rarely", "Sometimes", "Often", "Very Often"] LIKERT5_MAP = {label: idx for idx, label in enumerate(LIKERT5_LABELS)} # Encoding for outcome questions OUTCOME_LABELS = { 0: "Low Sleep Quality", 1: "High Sleep Quality", } # Download & load the native predictor def _prepare_predictor_dir() -> str: CACHE_DIR.mkdir(parents=True, exist_ok=True) local_zip = huggingface_hub.hf_hub_download( repo_id=MODEL_REPO_ID, filename=ZIP_FILENAME, repo_type="model", local_dir=str(CACHE_DIR), local_dir_use_symlinks=False, ) if EXTRACT_DIR.exists(): shutil.rmtree(EXTRACT_DIR) EXTRACT_DIR.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(local_zip, "r") as zf: zf.extractall(str(EXTRACT_DIR)) contents = list(EXTRACT_DIR.iterdir()) predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR return str(predictor_root) PREDICTOR_DIR = _prepare_predictor_dir() PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) # A mapping utility to make it easier to encode the variables def _human_label(c): try: ci = int(c) if ci in OUTCOME_LABELS: return OUTCOME_LABELS[ci] except Exception: pass if c in OUTCOME_LABELS: return OUTCOME_LABELS[c] return str(c) # This functions takes all of our features, encodes this accordingly, and performs a predictions def do_predict(phone_hours, computer_hours, device_count, use_before_bed_label, sleep_time, sleep_hours): # Note: sleep_quality is the target variable, not an input feature for prediction # use_before_bed is a Likert scale question use_before_bed_code = LIKERT5_MAP[use_before_bed_label] row = { FEATURE_COLS[0]: float(phone_hours), FEATURE_COLS[1]: float(computer_hours), FEATURE_COLS[2]: int(device_count), FEATURE_COLS[3]: int(use_before_bed_code), # Index 3 for 'use_before_bed' FEATURE_COLS[4]: float(sleep_time), FEATURE_COLS[5]: float(sleep_hours), } X = pandas.DataFrame([row], columns=[col for col in FEATURE_COLS if col != TARGET_COL]) # Exclude target column from input pred_series = PREDICTOR.predict(X) raw_pred = pred_series.iloc[0] try: proba = PREDICTOR.predict_proba(X) if isinstance(proba, pandas.Series): proba = proba.to_frame().T elif isinstance(proba, pandas.DataFrame): pass # proba is already a DataFrame except Exception as e: print(f"Error getting probabilities: {e}") proba = None pred_label = _human_label(raw_pred) proba_dict = None if proba is not None: # Ensure proba is a DataFrame before accessing .iloc[0] if isinstance(proba, pandas.DataFrame) and not proba.empty: row0 = proba.iloc[0] tmp = {} for cls, val in row0.items(): key = _human_label(cls) tmp[key] = float(val) + float(tmp.get(key, 0.0)) proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True)) else: print("Probability DataFrame is empty or not a DataFrame.") df_out = pandas.DataFrame([{ "Predicted outcome": pred_label, "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2), }]) md = f"**Prediction:** {pred_label}" if proba_dict: md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%" return proba_dict # Representative examples - Updated to match the new FEATURE_COLS EXAMPLES = [ [2.5, 4.0, 3, "Sometimes", 23.0, 7.0], # Example 1 [1.0, 8.0, 5, "Very Often", 1.0, 5.0], # Example 2 [5.0, 2.0, 2, "Never", 22.5, 8.5], # Example 3 [0.5, 10.0, 4, "Often", 0.0, 6.0], # Example 4 [3.0, 3.0, 1, "Rarely", 23.5, 7.5], # Example 5 ] # Gradio UI with gradio.Blocks() as demo: # Provide an introduction gradio.Markdown("# Sleep Quality Predictor") gradio.Markdown(""" This app predicts sleep quality based on device usage and sleep habits. Adjust the inputs below to see the predicted sleep quality. """) with gradio.Row(): phone_hours = gradio.Slider(0, 24, step=0.1, value=2.5, label=FEATURE_COLS[0]) computer_hours = gradio.Slider(0, 24, step=0.1, value=4.0, label=FEATURE_COLS[1]) device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2]) with gradio.Row(): use_before_bed_label = gradio.Radio(choices=LIKERT5_LABELS, value="Sometimes", label=FEATURE_COLS[3]) # Corrected index to 3 with gradio.Row(): sleep_time = gradio.Slider(0, 24, step=0.1, value=23.0, label=FEATURE_COLS[4]) # Corrected index to 4 sleep_hours = gradio.Slider(0, 12, step=0.1, value=7.0, label=FEATURE_COLS[5]) # Corrected index to 5 proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Changed to 2 classes # Inputs to the do_predict function inputs = [phone_hours, computer_hours, device_count, use_before_bed_label, sleep_time, sleep_hours] for comp in inputs: comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty]) gradio.Examples( examples=EXAMPLES, inputs=inputs, label="Representative examples", examples_per_page=5, cache_examples=False, ) if __name__ == "__main__": demo.launch()