Spaces:
Sleeping
Sleeping
| import os # For filesystem operations | |
| import shutil # For directory cleanup | |
| import zipfile # For extracting model archives | |
| import pathlib # For path manipulations | |
| import pandas # For tabular data handling | |
| import gradio # For interactive UI | |
| import huggingface_hub # For downloading model assets | |
| import autogluon.tabular # For loading and running AutoGluon predictors | |
| import sklearn # Import sklearn to check version | |
| # Settings | |
| MODEL_REPO_ID = "jennifee/classical_automl_model" # Updated to the correct model repo | |
| ZIP_FILENAME = "autogluon_predictor_dir.zip" # Assuming the zip filename is the same | |
| CACHE_DIR = pathlib.Path("hf_assets") | |
| EXTRACT_DIR = CACHE_DIR / "predictor_native_sleep" # Changed extract directory name | |
| # Feature column names and target column names based on the provided data structure | |
| FEATURE_COLS = [ | |
| "phone_hours", | |
| "computer_hours", | |
| "device_count", | |
| "sleep_quality", | |
| "sleep_time", | |
| "sleep_hours", | |
| ] | |
| TARGET_COL = "use_before_bed" # Assuming this is the target based on previous context | |
| # Encoding for sleep quality (assuming a categorical mapping is needed for the model) | |
| # This mapping is an example and may need adjustment based on the actual values in the dataset | |
| SLEEP_QUALITY_MAP = {"Poor": 0, "Fair": 1, "Good": 2, "Excellent": 3} | |
| # Encoding for outcome (assuming binary classification for use_before_bed) | |
| OUTCOME_LABELS = { | |
| 0: "Does not use device before bed", | |
| 1: "Uses device before bed", | |
| } | |
| # Download & load the native predictor | |
| def _prepare_predictor_dir() -> str: | |
| CACHE_DIR.mkdir(parents=True, exist_ok=True) | |
| local_zip = huggingface_hub.hf_hub_download( | |
| repo_id=MODEL_REPO_ID, | |
| filename=ZIP_FILENAME, | |
| repo_type="model", | |
| local_dir=str(CACHE_DIR), | |
| local_dir_use_symlinks=False, | |
| ) | |
| if EXTRACT_DIR.exists(): | |
| shutil.rmtree(EXTRACT_DIR) | |
| EXTRACT_DIR.mkdir(parents=True, exist_ok=True) | |
| with zipfile.ZipFile(local_zip, "r") as zf: | |
| zf.extractall(str(EXTRACT_DIR)) | |
| contents = list(EXTRACT_DIR.iterdir()) | |
| predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR | |
| return str(predictor_root) | |
| PREDICTOR_DIR = _prepare_predictor_dir() | |
| PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) | |
| # A mapping utility to make it easier to encode the variables | |
| def _human_label(c): | |
| try: | |
| ci = int(c) | |
| if ci in OUTCOME_LABELS: | |
| return OUTCOME_LABELS[ci] | |
| except Exception: | |
| pass | |
| if c in OUTCOME_LABELS: | |
| return OUTCOME_LABELS[c] | |
| return str(c) | |
| # This functions takes all of our features, encodes this accordingly, and performs a predictions | |
| def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours): | |
| print("Received inputs:") | |
| print(f" phone_hours: {phone_hours}") | |
| print(f" computer_hours: {computer_hours}") | |
| print(f" device_count: {device_count}") | |
| print(f" sleep_quality_label: {sleep_quality_label}") | |
| print(f" sleep_time: {sleep_time}") | |
| print(f" sleep_hours: {sleep_hours}") | |
| print(f" sklearn version: {sklearn.__version__}") # Print sklearn version | |
| try: | |
| # Encode categorical features | |
| sleep_quality_code = SLEEP_QUALITY_MAP[sleep_quality_label] | |
| row = { | |
| FEATURE_COLS[0]: float(phone_hours), | |
| FEATURE_COLS[1]: float(computer_hours), | |
| FEATURE_COLS[2]: int(device_count), | |
| FEATURE_COLS[3]: sleep_quality_code, | |
| FEATURE_COLS[4]: int(sleep_time), | |
| FEATURE_COLS[5]: float(sleep_hours), | |
| } | |
| X = pandas.DataFrame([row], columns=FEATURE_COLS) | |
| print("Input DataFrame (X):") | |
| print(X) | |
| pred_series = PREDICTOR.predict(X) | |
| raw_pred = pred_series.iloc[0] | |
| print("Raw prediction (pred_series):") | |
| print(pred_series) | |
| try: | |
| proba = PREDICTOR.predict_proba(X) | |
| if isinstance(proba, pandas.Series): | |
| proba = proba.to_frame().T | |
| print("Prediction probabilities (proba):") | |
| print(proba) | |
| except Exception as e: | |
| proba = None | |
| print(f"Error getting prediction probabilities: {e}") | |
| pred_label = _human_label(raw_pred) | |
| proba_dict = None | |
| if proba is not None: | |
| row0 = proba.iloc[0] | |
| tmp = {} | |
| for cls, val in row0.items(): | |
| key = _human_label(cls) | |
| tmp[key] = float(val) + float(tmp.get(key, 0.0)) | |
| proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True)) | |
| print("Probability dictionary (proba_dict):") | |
| print(proba_dict) | |
| df_out = pandas.DataFrame([{ | |
| "Predicted outcome": pred_label, | |
| "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2), | |
| }]) | |
| md = f"**Prediction:** {pred_label}" | |
| if proba_dict: | |
| md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%" | |
| print("Markdown output (md):") | |
| print(md) | |
| return proba_dict | |
| except Exception as e: | |
| print(f"An error occurred during prediction: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None # Return None or an empty dictionary in case of an error | |
| # Representative examples (these will need to be updated based on the new model's features) | |
| # These examples are placeholders and should be replaced with actual examples from the dataset if available | |
| EXAMPLES = [ | |
| [2.0, 3.0, 3, "Good", 2200, 8.0], | |
| [5.0, 6.0, 5, "Fair", 100, 6.0], | |
| [1.0, 1.0, 1, "Excellent", 2300, 9.0], | |
| ] | |
| # Gradio UI for the sleep habits model | |
| with gradio.Blocks() as demo: | |
| # Provide an introduction | |
| gradio.Markdown("# Device Use Before Sleep Predictor") | |
| gradio.Markdown(""" | |
| This app predicts whether a student uses their device before sleep based on their device usage and sleeping habits. | |
| """) | |
| with gradio.Row(): | |
| phone_hours = gradio.Slider(0, 10, step=0.1, value=2.0, label=FEATURE_COLS[0]) | |
| computer_hours = gradio.Slider(0, 10, step=0.1, value=3.0, label=FEATURE_COLS[1]) | |
| device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2]) | |
| with gradio.Row(): | |
| sleep_quality_label = gradio.Radio(choices=list(SLEEP_QUALITY_MAP.keys()), value="Good", label=FEATURE_COLS[3]) | |
| sleep_time = gradio.Number(value=2200, precision=0, label=FEATURE_COLS[4]) | |
| sleep_hours = gradio.Slider(0, 12, step=0.1, value=8.0, label=FEATURE_COLS[5]) | |
| proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Assuming binary classification | |
| inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours] | |
| for comp in inputs: | |
| comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty]) | |
| gradio.Examples( | |
| examples=EXAMPLES, | |
| inputs=inputs, | |
| label="Representative examples", | |
| examples_per_page=3, | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |