Spaces:
Sleeping
Sleeping
File size: 7,194 Bytes
29a78de 87679cb 0d4965f 29a78de 0d4965f 29a78de 0d4965f 29a78de 0d4965f 29a78de 0d4965f 29a78de 0d4965f 87679cb 0d4965f 29a78de 0d4965f 87679cb da8f203 87679cb 0d4965f 87679cb 0d4965f 29a78de 0d4965f da8f203 0d4965f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import os # For filesystem operations
import shutil # For directory cleanup
import zipfile # For extracting model archives
import pathlib # For path manipulations
import pandas # For tabular data handling
import gradio # For interactive UI
import huggingface_hub # For downloading model assets
import autogluon.tabular # For loading and running AutoGluon predictors
import sklearn # Import sklearn to check version
# Settings
MODEL_REPO_ID = "jennifee/classical_automl_model" # Updated to the correct model repo
ZIP_FILENAME = "autogluon_predictor_dir.zip" # Assuming the zip filename is the same
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native_sleep" # Changed extract directory name
# Feature column names and target column names based on the provided data structure
FEATURE_COLS = [
"phone_hours",
"computer_hours",
"device_count",
"sleep_quality",
"sleep_time",
"sleep_hours",
]
TARGET_COL = "use_before_bed" # Assuming this is the target based on previous context
# Encoding for sleep quality (assuming a categorical mapping is needed for the model)
# This mapping is an example and may need adjustment based on the actual values in the dataset
SLEEP_QUALITY_MAP = {"Poor": 0, "Fair": 1, "Good": 2, "Excellent": 3}
# Encoding for outcome (assuming binary classification for use_before_bed)
OUTCOME_LABELS = {
0: "Does not use device before bed",
1: "Uses device before bed",
}
# Download & load the native predictor
def _prepare_predictor_dir() -> str:
CACHE_DIR.mkdir(parents=True, exist_ok=True)
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="model",
local_dir=str(CACHE_DIR),
local_dir_use_symlinks=False,
)
if EXTRACT_DIR.exists():
shutil.rmtree(EXTRACT_DIR)
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(local_zip, "r") as zf:
zf.extractall(str(EXTRACT_DIR))
contents = list(EXTRACT_DIR.iterdir())
predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
return str(predictor_root)
PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
# A mapping utility to make it easier to encode the variables
def _human_label(c):
try:
ci = int(c)
if ci in OUTCOME_LABELS:
return OUTCOME_LABELS[ci]
except Exception:
pass
if c in OUTCOME_LABELS:
return OUTCOME_LABELS[c]
return str(c)
# This functions takes all of our features, encodes this accordingly, and performs a predictions
def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours):
print("Received inputs:")
print(f" phone_hours: {phone_hours}")
print(f" computer_hours: {computer_hours}")
print(f" device_count: {device_count}")
print(f" sleep_quality_label: {sleep_quality_label}")
print(f" sleep_time: {sleep_time}")
print(f" sleep_hours: {sleep_hours}")
print(f" sklearn version: {sklearn.__version__}") # Print sklearn version
try:
# Encode categorical features
sleep_quality_code = SLEEP_QUALITY_MAP[sleep_quality_label]
row = {
FEATURE_COLS[0]: float(phone_hours),
FEATURE_COLS[1]: float(computer_hours),
FEATURE_COLS[2]: int(device_count),
FEATURE_COLS[3]: sleep_quality_code,
FEATURE_COLS[4]: int(sleep_time),
FEATURE_COLS[5]: float(sleep_hours),
}
X = pandas.DataFrame([row], columns=FEATURE_COLS)
print("Input DataFrame (X):")
print(X)
pred_series = PREDICTOR.predict(X)
raw_pred = pred_series.iloc[0]
print("Raw prediction (pred_series):")
print(pred_series)
try:
proba = PREDICTOR.predict_proba(X)
if isinstance(proba, pandas.Series):
proba = proba.to_frame().T
print("Prediction probabilities (proba):")
print(proba)
except Exception as e:
proba = None
print(f"Error getting prediction probabilities: {e}")
pred_label = _human_label(raw_pred)
proba_dict = None
if proba is not None:
row0 = proba.iloc[0]
tmp = {}
for cls, val in row0.items():
key = _human_label(cls)
tmp[key] = float(val) + float(tmp.get(key, 0.0))
proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
print("Probability dictionary (proba_dict):")
print(proba_dict)
df_out = pandas.DataFrame([{
"Predicted outcome": pred_label,
"Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2),
}])
md = f"**Prediction:** {pred_label}"
if proba_dict:
md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
print("Markdown output (md):")
print(md)
return proba_dict
except Exception as e:
print(f"An error occurred during prediction: {e}")
import traceback
traceback.print_exc()
return None # Return None or an empty dictionary in case of an error
# Representative examples (these will need to be updated based on the new model's features)
# These examples are placeholders and should be replaced with actual examples from the dataset if available
EXAMPLES = [
[2.0, 3.0, 3, "Good", 2200, 8.0],
[5.0, 6.0, 5, "Fair", 100, 6.0],
[1.0, 1.0, 1, "Excellent", 2300, 9.0],
]
# Gradio UI for the sleep habits model
with gradio.Blocks() as demo:
# Provide an introduction
gradio.Markdown("# Device Use Before Sleep Predictor")
gradio.Markdown("""
This app predicts whether a student uses their device before sleep based on their device usage and sleeping habits.
""")
with gradio.Row():
phone_hours = gradio.Slider(0, 10, step=0.1, value=2.0, label=FEATURE_COLS[0])
computer_hours = gradio.Slider(0, 10, step=0.1, value=3.0, label=FEATURE_COLS[1])
device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2])
with gradio.Row():
sleep_quality_label = gradio.Radio(choices=list(SLEEP_QUALITY_MAP.keys()), value="Good", label=FEATURE_COLS[3])
sleep_time = gradio.Number(value=2200, precision=0, label=FEATURE_COLS[4])
sleep_hours = gradio.Slider(0, 12, step=0.1, value=8.0, label=FEATURE_COLS[5])
proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Assuming binary classification
inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours]
for comp in inputs:
comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])
gradio.Examples(
examples=EXAMPLES,
inputs=inputs,
label="Representative examples",
examples_per_page=3,
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()
|