aedupuga commited on
Commit
0d4965f
·
verified ·
1 Parent(s): 62f73fa

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +168 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os # For filesystem operations
3
+ import shutil # For directory cleanup
4
+ import zipfile # For extracting model archives
5
+ import pathlib # For path manipulations
6
+ import pandas # For tabular data handling
7
+ import gradio # For interactive UI
8
+ import huggingface_hub # For downloading model assets
9
+ import autogluon.tabular # For loading and running AutoGluon predictors
10
+
11
+ # Settings
12
+ MODEL_REPO_ID = "jennifee/classical_automl_model" # Updated to the correct model repo
13
+ ZIP_FILENAME = "autogluon_predictor_dir.zip" # Assuming the zip filename is the same
14
+ CACHE_DIR = pathlib.Path("hf_assets")
15
+ EXTRACT_DIR = CACHE_DIR / "predictor_native_sleep" # Changed extract directory name
16
+
17
+ # Feature column names and target column names based on the provided data structure
18
+ FEATURE_COLS = [
19
+ "phone_hours",
20
+ "computer_hours",
21
+ "device_count",
22
+ "sleep_quality",
23
+ "sleep_time",
24
+ "sleep_hours",
25
+ ]
26
+ TARGET_COL = "use_before_bed" # Assuming this is the target based on previous context
27
+
28
+ # Encoding for sleep quality (assuming a categorical mapping is needed for the model)
29
+ # This mapping is an example and may need adjustment based on the actual values in the dataset
30
+ SLEEP_QUALITY_MAP = {"Poor": 0, "Fair": 1, "Good": 2, "Excellent": 3}
31
+
32
+ # Encoding for outcome (assuming binary classification for use_before_bed)
33
+ OUTCOME_LABELS = {
34
+ 0: "Does not use device before bed",
35
+ 1: "Uses device before bed",
36
+ }
37
+
38
+ # Download & load the native predictor
39
+ def _prepare_predictor_dir() -> str:
40
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
41
+ local_zip = huggingface_hub.hf_hub_download(
42
+ repo_id=MODEL_REPO_ID,
43
+ filename=ZIP_FILENAME,
44
+ repo_type="model",
45
+ local_dir=str(CACHE_DIR),
46
+ local_dir_use_symlinks=False,
47
+ )
48
+ if EXTRACT_DIR.exists():
49
+ shutil.rmtree(EXTRACT_DIR)
50
+ EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
51
+ with zipfile.ZipFile(local_zip, "r") as zf:
52
+ zf.extractall(str(EXTRACT_DIR))
53
+ contents = list(EXTRACT_DIR.iterdir())
54
+ predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
55
+ return str(predictor_root)
56
+
57
+ PREDICTOR_DIR = _prepare_predictor_dir()
58
+ PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
59
+
60
+ # A mapping utility to make it easier to encode the variables
61
+ def _human_label(c):
62
+ try:
63
+ ci = int(c)
64
+ if ci in OUTCOME_LABELS:
65
+ return OUTCOME_LABELS[ci]
66
+ except Exception:
67
+ pass
68
+ if c in OUTCOME_LABELS:
69
+ return OUTCOME_LABELS[c]
70
+ return str(c)
71
+
72
+ # This functions takes all of our features, encodes this accordingly, and performs a predictions
73
+ def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours):
74
+ # Encode categorical features
75
+ sleep_quality_code = SLEEP_QUALITY_MAP[sleep_quality_label]
76
+
77
+ row = {
78
+ FEATURE_COLS[0]: float(phone_hours),
79
+ FEATURE_COLS[1]: float(computer_hours),
80
+ FEATURE_COLS[2]: int(device_count),
81
+ FEATURE_COLS[3]: sleep_quality_code,
82
+ FEATURE_COLS[4]: int(sleep_time),
83
+ FEATURE_COLS[5]: float(sleep_hours),
84
+ }
85
+ X = pandas.DataFrame([row], columns=FEATURE_COLS)
86
+
87
+
88
+ pred_series = PREDICTOR.predict(X)
89
+ raw_pred = pred_series.iloc[0]
90
+
91
+
92
+ try:
93
+ proba = PREDICTOR.predict_proba(X)
94
+ if isinstance(proba, pandas.Series):
95
+ proba = proba.to_frame().T
96
+ except Exception as e:
97
+ proba = None
98
+
99
+
100
+ pred_label = _human_label(raw_pred)
101
+
102
+ proba_dict = None
103
+ if proba is not None:
104
+ row0 = proba.iloc[0]
105
+ tmp = {}
106
+ for cls, val in row0.items():
107
+ key = _human_label(cls)
108
+ tmp[key] = float(val) + float(tmp.get(key, 0.0))
109
+ proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
110
+
111
+
112
+ df_out = pandas.DataFrame([{
113
+ "Predicted outcome": pred_label,
114
+ "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2),
115
+ }])
116
+
117
+ md = f"**Prediction:** {pred_label}"
118
+ if proba_dict:
119
+ md += f"
120
+ **Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
121
+
122
+
123
+ return proba_dict
124
+
125
+ # Representative examples (these will need to be updated based on the new model's features)
126
+ # These examples are placeholders and should be replaced with actual examples from the dataset if available
127
+ EXAMPLES = [
128
+ [2.0, 3.0, 3, "Good", 2200, 8.0],
129
+ [5.0, 6.0, 5, "Fair", 100, 6.0],
130
+ [1.0, 1.0, 1, "Excellent", 2300, 9.0],
131
+
132
+ ]
133
+
134
+ # Gradio UI for the sleep habits model
135
+ with gradio.Blocks() as demo:
136
+ # Provide an introduction
137
+ gradio.Markdown("# Device Use Before Sleep Predictor")
138
+ gradio.Markdown("""
139
+ This app predicts whether a student uses their device before sleep based on their device usage and sleeping habits.
140
+ """)
141
+
142
+ with gradio.Row():
143
+ phone_hours = gradio.Slider(0, 10, step=0.1, value=2.0, label=FEATURE_COLS[0])
144
+ computer_hours = gradio.Slider(0, 10, step=0.1, value=3.0, label=FEATURE_COLS[1])
145
+ device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2])
146
+
147
+ with gradio.Row():
148
+ sleep_quality_label = gradio.Radio(choices=list(SLEEP_QUALITY_MAP.keys()), value="Good", label=FEATURE_COLS[3])
149
+ sleep_time = gradio.Number(value=2200, precision=0, label=FEATURE_COLS[4])
150
+ sleep_hours = gradio.Slider(0, 12, step=0.1, value=8.0, label=FEATURE_COLS[5])
151
+
152
+
153
+ proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Assuming binary classification
154
+
155
+ inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours]
156
+ for comp in inputs:
157
+ comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])
158
+
159
+ gradio.Examples(
160
+ examples=EXAMPLES,
161
+ inputs=inputs,
162
+ label="Representative examples",
163
+ examples_per_page=3,
164
+ cache_examples=False,
165
+ )
166
+
167
+ if __name__ == "__main__":
168
+ demo.launch()