Spaces:
Sleeping
Sleeping
| import os # For filesystem operations | |
| import shutil # For directory cleanup | |
| import zipfile # For extracting model archives | |
| import pathlib # For path manipulations | |
| import pandas # For tabular data handling | |
| import gradio # For interactive UI | |
| import huggingface_hub # For downloading model assets | |
| import autogluon.tabular # For loading and running AutoGluon predictors | |
| from huggingface_hub import HfApi | |
| # Settings | |
| api = HfApi() | |
| MODEL_REPO_ID = "jennifee/classical_automl_model" | |
| ZIP_FILENAME = "autogluon_predictor_dir.zip" | |
| CACHE_DIR = pathlib.Path("hf_assets") | |
| EXTRACT_DIR = CACHE_DIR / "predictor_native" | |
| # Feature column names and target column names | |
| FEATURE_COLS = ['phone_hours', | |
| 'computer_hours', | |
| 'device_count', | |
| 'use_before_bed', | |
| 'sleep_time', | |
| 'sleep_hours' | |
| ] | |
| TARGET_COL = "sleep_quality" | |
| # Encoding for likert questions | |
| # Encoding for likert questions | |
| LIKERT5_LABELS = ["Never", "Rarely", "Sometimes", "Often", "Very Often"] | |
| LIKERT5_MAP = {label: idx for idx, label in enumerate(LIKERT5_LABELS)} | |
| # Encoding for outcome questions | |
| OUTCOME_LABELS = { | |
| 0: "Low Sleep Quality", | |
| 1: "High Sleep Quality", | |
| } | |
| # Download & load the native predictor | |
| def _prepare_predictor_dir() -> str: | |
| CACHE_DIR.mkdir(parents=True, exist_ok=True) | |
| local_zip = huggingface_hub.hf_hub_download( | |
| repo_id=MODEL_REPO_ID, | |
| filename=ZIP_FILENAME, | |
| repo_type="model", | |
| local_dir=str(CACHE_DIR), | |
| local_dir_use_symlinks=False, | |
| ) | |
| if EXTRACT_DIR.exists(): | |
| shutil.rmtree(EXTRACT_DIR) | |
| EXTRACT_DIR.mkdir(parents=True, exist_ok=True) | |
| with zipfile.ZipFile(local_zip, "r") as zf: | |
| zf.extractall(str(EXTRACT_DIR)) | |
| contents = list(EXTRACT_DIR.iterdir()) | |
| predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR | |
| return str(predictor_root) | |
| PREDICTOR_DIR = _prepare_predictor_dir() | |
| PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) | |
| # A mapping utility to make it easier to encode the variables | |
| def _human_label(c): | |
| try: | |
| ci = int(c) | |
| if ci in OUTCOME_LABELS: | |
| return OUTCOME_LABELS[ci] | |
| except Exception: | |
| pass | |
| if c in OUTCOME_LABELS: | |
| return OUTCOME_LABELS[c] | |
| return str(c) | |
| # This functions takes all of our features, encodes this accordingly, and performs a predictions | |
| def do_predict(phone_hours, computer_hours, device_count, use_before_bed_label, sleep_time, sleep_hours): | |
| # Note: sleep_quality is the target variable, not an input feature for prediction | |
| # use_before_bed is a Likert scale question | |
| use_before_bed_code = LIKERT5_MAP[use_before_bed_label] | |
| row = { | |
| FEATURE_COLS[0]: float(phone_hours), | |
| FEATURE_COLS[1]: float(computer_hours), | |
| FEATURE_COLS[2]: int(device_count), | |
| FEATURE_COLS[3]: int(use_before_bed_code), # Index 3 for 'use_before_bed' | |
| FEATURE_COLS[4]: float(sleep_time), | |
| FEATURE_COLS[5]: float(sleep_hours), | |
| } | |
| X = pandas.DataFrame([row], columns=[col for col in FEATURE_COLS if col != TARGET_COL]) # Exclude target column from input | |
| pred_series = PREDICTOR.predict(X) | |
| raw_pred = pred_series.iloc[0] | |
| try: | |
| proba = PREDICTOR.predict_proba(X) | |
| if isinstance(proba, pandas.Series): | |
| proba = proba.to_frame().T | |
| elif isinstance(proba, pandas.DataFrame): | |
| pass # proba is already a DataFrame | |
| except Exception as e: | |
| print(f"Error getting probabilities: {e}") | |
| proba = None | |
| pred_label = _human_label(raw_pred) | |
| proba_dict = None | |
| if proba is not None: | |
| # Ensure proba is a DataFrame before accessing .iloc[0] | |
| if isinstance(proba, pandas.DataFrame) and not proba.empty: | |
| row0 = proba.iloc[0] | |
| tmp = {} | |
| for cls, val in row0.items(): | |
| key = _human_label(cls) | |
| tmp[key] = float(val) + float(tmp.get(key, 0.0)) | |
| proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True)) | |
| else: | |
| print("Probability DataFrame is empty or not a DataFrame.") | |
| df_out = pandas.DataFrame([{ | |
| "Predicted outcome": pred_label, | |
| "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2), | |
| }]) | |
| md = f"**Prediction:** {pred_label}" | |
| if proba_dict: | |
| md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%" | |
| return proba_dict | |
| # Representative examples - Updated to match the new FEATURE_COLS | |
| EXAMPLES = [ | |
| [2.5, 4.0, 3, "Sometimes", 23.0, 7.0], # Example 1 | |
| [1.0, 8.0, 5, "Very Often", 1.0, 5.0], # Example 2 | |
| [5.0, 2.0, 2, "Never", 22.5, 8.5], # Example 3 | |
| [0.5, 10.0, 4, "Often", 0.0, 6.0], # Example 4 | |
| [3.0, 3.0, 1, "Rarely", 23.5, 7.5], # Example 5 | |
| ] | |
| # Gradio UI | |
| with gradio.Blocks() as demo: | |
| # Provide an introduction | |
| gradio.Markdown("# Sleep Quality Predictor") | |
| gradio.Markdown(""" | |
| This app predicts sleep quality based on device usage and sleep habits. | |
| Adjust the inputs below to see the predicted sleep quality. | |
| """) | |
| with gradio.Row(): | |
| phone_hours = gradio.Slider(0, 24, step=0.1, value=2.5, label=FEATURE_COLS[0]) | |
| computer_hours = gradio.Slider(0, 24, step=0.1, value=4.0, label=FEATURE_COLS[1]) | |
| device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2]) | |
| with gradio.Row(): | |
| use_before_bed_label = gradio.Radio(choices=LIKERT5_LABELS, value="Sometimes", label=FEATURE_COLS[3]) # Corrected index to 3 | |
| with gradio.Row(): | |
| sleep_time = gradio.Slider(0, 24, step=0.1, value=23.0, label=FEATURE_COLS[4]) # Corrected index to 4 | |
| sleep_hours = gradio.Slider(0, 12, step=0.1, value=7.0, label=FEATURE_COLS[5]) # Corrected index to 5 | |
| proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Changed to 2 classes | |
| # Inputs to the do_predict function | |
| inputs = [phone_hours, computer_hours, device_count, use_before_bed_label, sleep_time, sleep_hours] | |
| for comp in inputs: | |
| comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty]) | |
| gradio.Examples( | |
| examples=EXAMPLES, | |
| inputs=inputs, | |
| label="Representative examples", | |
| examples_per_page=5, | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |