bcueva commited on
Commit
0b3293e
·
verified ·
1 Parent(s): bc5f799

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +171 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import zipfile
4
+ import pathlib
5
+ import pandas
6
+ import gradio
7
+ import huggingface_hub
8
+ import autogluon.tabular
9
+
10
+ # Settings
11
+ MODEL_REPO_ID = "jennifee/classical_automl_model"
12
+ ZIP_FILENAME = "autogluon_predictor_dir.zip"
13
+ CACHE_DIR = pathlib.Path("hf_assets")
14
+ EXTRACT_DIR = CACHE_DIR / "predictor_native"
15
+
16
+ # Feature column names and target column names (these should match the model's expected input)
17
+ FEATURE_COLS_MODEL = [
18
+ "phone_hours",
19
+ "computer_hours",
20
+ "device_count",
21
+ "sleep_quality",
22
+ "sleep_time",
23
+ "sleep_hours",
24
+ ]
25
+ TARGET_COL = "use_before_bed"
26
+
27
+ # Encoding for categorical features - these mappings are for the Gradio interface,
28
+ # the model expects the original string/numeric values based on the training data.
29
+ SLEEP_QUALITY_LABELS = ["good", "medium", "bad"]
30
+
31
+ # Encoding for the target outcome (for displaying the prediction and probability)
32
+ USE_BEFORE_BED_LABELS = {
33
+ 0: "No",
34
+ 1: "Yes",
35
+ }
36
+
37
+
38
+ # Download & load the native predictor
39
+ def _prepare_predictor_dir() -> str:
40
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
41
+ local_zip = huggingface_hub.hf_hub_download(
42
+ repo_id=MODEL_REPO_ID,
43
+ filename=ZIP_FILENAME,
44
+ repo_type="model",
45
+ local_dir=str(CACHE_DIR),
46
+ local_dir_use_symlinks=False,
47
+ )
48
+ if EXTRACT_DIR.exists():
49
+ shutil.rmtree(EXTRACT_DIR)
50
+ EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
51
+ with zipfile.ZipFile(local_zip, "r") as zf:
52
+ zf.extractall(str(EXTRACT_DIR))
53
+ contents = list(EXTRACT_DIR.iterdir())
54
+ predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
55
+ return str(predictor_root)
56
+
57
+ PREDICTOR_DIR = _prepare_predictor_dir()
58
+ PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
59
+
60
+ # A mapping utility to make it easier to encode the variables for display
61
+ def _human_label(c):
62
+ # Updated to map 0/1 to "No"/"Yes" for the target
63
+ try:
64
+ ci = int(c)
65
+ if ci in USE_BEFORE_BED_LABELS:
66
+ return USE_BEFORE_BED_LABELS[ci]
67
+ except Exception:
68
+ pass
69
+ return str(c)
70
+
71
+ # This functions takes all of our features, encodes this accordingly, and performs a prediction
72
+ def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours):
73
+ # Create the input row using the original column names and appropriate data types
74
+ row = {
75
+ "phone_hours": float(phone_hours),
76
+ "computer_hours": float(computer_hours),
77
+ "device_count": int(device_count),
78
+ "sleep_quality": sleep_quality_label,
79
+ "sleep_time": int(sleep_time),
80
+ "sleep_hours": float(sleep_hours),
81
+ }
82
+ X = pandas.DataFrame([row], columns=FEATURE_COLS_MODEL)
83
+
84
+
85
+ pred_series = PREDICTOR.predict(X)
86
+ raw_pred = pred_series.iloc[0]
87
+
88
+ try:
89
+ # Re-added probability prediction
90
+ proba = PREDICTOR.predict_proba(X)
91
+ if isinstance(proba, pandas.Series):
92
+ proba = proba.to_frame().T
93
+ except Exception as e:
94
+ print(f"Error calculating probabilities: {e}")
95
+ proba = None
96
+
97
+ pred_label = _human_label(raw_pred)
98
+
99
+ proba_dict = None
100
+ if proba is not None:
101
+ row0 = proba.iloc[0]
102
+ tmp = {}
103
+ # Assuming the probability columns are the class labels (0 and 1)
104
+ for cls in [0, 1]:
105
+ if cls in row0:
106
+ key = _human_label(cls)
107
+ tmp[key] = float(row0[cls]) + float(tmp.get(key, 0.0))
108
+ proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
109
+
110
+
111
+ # Updated output format to include confidence
112
+ md = f"**Prediction:** {pred_label}"
113
+ if proba_dict:
114
+ md += f"
115
+ **Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
116
+
117
+ # Always return two values, even if proba_dict is None
118
+ return proba_dict, md
119
+
120
+
121
+ # Representative examples - Updated examples based on the new dataset
122
+ EXAMPLES = [
123
+ [3.5, 5.0, 3, "good", 23, 7.0],
124
+ [4.2, 6.5, 3, "medium", 0, 6.5],
125
+ [5.0, 4.0, 4, "bad", 1, 6.0],
126
+ [2.0, 7.5, 3, "good", 22, 7.5],
127
+ [3.8, 6.0, 3, "medium", 0, 6.0],
128
+ [4.5, 5.5, 3, "good", 1, 7.0],
129
+ ]
130
+
131
+ # Gradio UI
132
+ with gradio.Blocks() as demo:
133
+ # Provide an introduction
134
+ gradio.Markdown("# Predict Phone Use Before Bed")
135
+ gradio.Markdown("""
136
+ This app predicts whether a student uses their phone before bed based on their sleeping habits.
137
+ Enter the student's sleeping habits below to get a prediction.
138
+ """)
139
+
140
+ with gradio.Row():
141
+ phone_hours = gradio.Number(value=3.5, precision=1, label=FEATURE_COLS_MODEL[0])
142
+ computer_hours = gradio.Number(value=5.0, precision=1, label=FEATURE_COLS_MODEL[1])
143
+ device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS_MODEL[2])
144
+
145
+ with gradio.Row():
146
+ sleep_quality_label = gradio.Radio(choices=SLEEP_QUALITY_LABELS, value="good", label=FEATURE_COLS_MODEL[3])
147
+ sleep_time = gradio.Number(value=23, precision=0, label=FEATURE_COLS_MODEL[4])
148
+ sleep_hours = gradio.Number(value=7.0, precision=1, label=FEATURE_COLS_MODEL[5])
149
+
150
+
151
+ # Re-added Label for probability output and kept Markdown for prediction text
152
+ proba_pretty = gradio.Label(num_top_classes=2, label="Probability of Using Phone Before Bed")
153
+ prediction_output = gradio.Markdown(label="Prediction")
154
+
155
+ # Updated inputs list and outputs list to match the do_predict function
156
+ inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours]
157
+ # The do_predict function now returns either a dict or a string, update outputs accordingly
158
+ outputs = [proba_pretty, prediction_output]
159
+ for comp in inputs:
160
+ comp.change(fn=do_predict, inputs=inputs, outputs=outputs)
161
+
162
+ gradio.Examples(
163
+ examples=EXAMPLES,
164
+ inputs=inputs,
165
+ label="Representative examples",
166
+ examples_per_page=5,
167
+ cache_examples=False,
168
+ )
169
+
170
+ if __name__ == "__main__":
171
+ demo.launch(debug=False)