bcueva commited on
Commit
2c32c76
·
verified ·
1 Parent(s): 4b8609a

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +24 -66
app.py CHANGED
@@ -1,41 +1,19 @@
1
- import os
2
- import shutil
3
- import zipfile
4
- import pathlib
5
  import pandas
6
  import gradio
7
  import huggingface_hub
8
  import autogluon.tabular
9
 
10
- # Settings
11
  MODEL_REPO_ID = "jennifee/classical_automl_model"
12
  ZIP_FILENAME = "autogluon_predictor_dir.zip"
13
  CACHE_DIR = pathlib.Path("hf_assets")
14
  EXTRACT_DIR = CACHE_DIR / "predictor_native"
15
 
16
- # Feature column names and target column names (these should match the model's expected input)
17
- FEATURE_COLS_MODEL = [
18
- "phone_hours",
19
- "computer_hours",
20
- "device_count",
21
- "sleep_quality",
22
- "sleep_time",
23
- "sleep_hours",
24
- ]
25
  TARGET_COL = "use_before_bed"
 
 
26
 
27
- # Encoding for categorical features - these mappings are for the Gradio interface,
28
- # the model expects the original string/numeric values based on the training data.
29
- SLEEP_QUALITY_LABELS = ["good", "medium", "bad"]
30
-
31
- # Encoding for the target outcome (for displaying the prediction and probability)
32
- USE_BEFORE_BED_LABELS = {
33
- 0: "No",
34
- 1: "Yes",
35
- }
36
-
37
-
38
- # Download & load the native predictor
39
  def _prepare_predictor_dir() -> str:
40
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
41
  local_zip = huggingface_hub.hf_hub_download(
@@ -57,9 +35,7 @@ def _prepare_predictor_dir() -> str:
57
  PREDICTOR_DIR = _prepare_predictor_dir()
58
  PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
59
 
60
- # A mapping utility to make it easier to encode the variables for display
61
  def _human_label(c):
62
- # Updated to map 0/1 to "No"/"Yes" for the target
63
  try:
64
  ci = int(c)
65
  if ci in USE_BEFORE_BED_LABELS:
@@ -68,25 +44,23 @@ def _human_label(c):
68
  pass
69
  return str(c)
70
 
71
- # This functions takes all of our features, encodes this accordingly, and performs a prediction
72
  def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours):
73
- # Create the input row using the original column names and appropriate data types
74
  row = {
75
  "phone_hours": float(phone_hours),
76
  "computer_hours": float(computer_hours),
77
  "device_count": int(device_count),
78
- "sleep_quality": sleep_quality_label, # Pass the label directly
79
  "sleep_time": int(sleep_time),
80
  "sleep_hours": float(sleep_hours),
81
  }
82
  X = pandas.DataFrame([row], columns=FEATURE_COLS_MODEL)
83
 
84
-
85
  pred_series = PREDICTOR.predict(X)
86
  raw_pred = pred_series.iloc[0]
 
87
 
 
88
  try:
89
- # Re-added probability prediction
90
  proba = PREDICTOR.predict_proba(X)
91
  if isinstance(proba, pandas.Series):
92
  proba = proba.to_frame().T
@@ -94,48 +68,36 @@ def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, s
94
  print(f"Error calculating probabilities: {e}")
95
  proba = None
96
 
97
- pred_label = _human_label(raw_pred)
98
-
99
  proba_dict = None
100
  if proba is not None:
101
  row0 = proba.iloc[0]
102
  tmp = {}
103
- # Assuming the probability columns are the class labels (0 and 1)
104
  for cls in [0, 1]:
105
- if cls in row0:
106
- key = _human_label(cls)
107
- tmp[key] = float(row0[cls]) + float(tmp.get(key, 0.0))
108
- proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
109
-
 
 
 
 
 
110
 
111
- # Updated output format to include confidence
112
  md = f"**Prediction:** {pred_label}"
113
  if proba_dict:
114
- # Corrected the f-string syntax to be on a single line within the multiline string
115
  md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
116
 
117
- # Always return two values, even if proba_dict is None
118
  return proba_dict, md
119
 
 
120
 
121
- # Representative examples - Updated examples based on the new dataset
122
- EXAMPLES = [
123
- [3.5, 5.0, 3, "good", 23, 7.0],
124
- [4.2, 6.5, 3, "medium", 0, 6.5],
125
- [5.0, 4.0, 4, "bad", 1, 6.0],
126
- [2.0, 7.5, 3, "good", 22, 7.5],
127
- [3.8, 6.0, 3, "medium", 0, 6.0],
128
- [4.5, 5.5, 3, "good", 1, 7.0],
129
- ]
130
-
131
- # Gradio UI
132
  with gradio.Blocks() as demo:
133
- # Provide an introduction
134
  gradio.Markdown("# Predict Phone Use Before Bed")
135
- gradio.Markdown("""
136
- This app predicts whether a student uses their phone before bed based on their sleeping habits.
137
- Enter the student's sleeping habits below to get a prediction.
138
- """)
139
 
140
  with gradio.Row():
141
  phone_hours = gradio.Number(value=3.5, precision=1, label=FEATURE_COLS_MODEL[0])
@@ -147,14 +109,10 @@ Enter the student's sleeping habits below to get a prediction.
147
  sleep_time = gradio.Number(value=23, precision=0, label=FEATURE_COLS_MODEL[4])
148
  sleep_hours = gradio.Number(value=7.0, precision=1, label=FEATURE_COLS_MODEL[5])
149
 
150
-
151
- # Re-added Label for probability output and kept Markdown for prediction text
152
  proba_pretty = gradio.Label(num_top_classes=2, label="Probability of Using Phone Before Bed")
153
- prediction_output = gradio.Markdown(label="Prediction")
154
 
155
- # Updated inputs list and outputs list to match the do_predict function
156
  inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours]
157
- # The do_predict function now returns either a dict or a string, update outputs accordingly
158
  outputs = [proba_pretty, prediction_output]
159
  for comp in inputs:
160
  comp.change(fn=do_predict, inputs=inputs, outputs=outputs)
@@ -168,4 +126,4 @@ Enter the student's sleeping habits below to get a prediction.
168
  )
169
 
170
  if __name__ == "__main__":
171
- demo.launch(debug=False) # Setting debug back to False for Space
 
1
+ import pathlib, shutil, zipfile
 
 
 
2
  import pandas
3
  import gradio
4
  import huggingface_hub
5
  import autogluon.tabular
6
 
 
7
  MODEL_REPO_ID = "jennifee/classical_automl_model"
8
  ZIP_FILENAME = "autogluon_predictor_dir.zip"
9
  CACHE_DIR = pathlib.Path("hf_assets")
10
  EXTRACT_DIR = CACHE_DIR / "predictor_native"
11
 
12
+ FEATURE_COLS_MODEL = ['phone_hours', 'computer_hours', 'device_count', 'sleep_quality', 'sleep_time', 'sleep_hours']
 
 
 
 
 
 
 
 
13
  TARGET_COL = "use_before_bed"
14
+ SLEEP_QUALITY_LABELS = ['good', 'medium', 'bad']
15
+ USE_BEFORE_BED_LABELS = {0: 'No', 1: 'Yes'}
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def _prepare_predictor_dir() -> str:
18
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
19
  local_zip = huggingface_hub.hf_hub_download(
 
35
  PREDICTOR_DIR = _prepare_predictor_dir()
36
  PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
37
 
 
38
  def _human_label(c):
 
39
  try:
40
  ci = int(c)
41
  if ci in USE_BEFORE_BED_LABELS:
 
44
  pass
45
  return str(c)
46
 
 
47
  def do_predict(phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours):
 
48
  row = {
49
  "phone_hours": float(phone_hours),
50
  "computer_hours": float(computer_hours),
51
  "device_count": int(device_count),
52
+ "sleep_quality": sleep_quality_label,
53
  "sleep_time": int(sleep_time),
54
  "sleep_hours": float(sleep_hours),
55
  }
56
  X = pandas.DataFrame([row], columns=FEATURE_COLS_MODEL)
57
 
 
58
  pred_series = PREDICTOR.predict(X)
59
  raw_pred = pred_series.iloc[0]
60
+ pred_label = _human_label(raw_pred)
61
 
62
+ proba = None
63
  try:
 
64
  proba = PREDICTOR.predict_proba(X)
65
  if isinstance(proba, pandas.Series):
66
  proba = proba.to_frame().T
 
68
  print(f"Error calculating probabilities: {e}")
69
  proba = None
70
 
 
 
71
  proba_dict = None
72
  if proba is not None:
73
  row0 = proba.iloc[0]
74
  tmp = {}
 
75
  for cls in [0, 1]:
76
+ val = None
77
+ if cls in row0.index:
78
+ val = row0[cls]
79
+ elif str(cls) in row0.index:
80
+ val = row0[str(cls)]
81
+ if val is not None:
82
+ key = _human_label(cls)
83
+ tmp[key] = float(tmp.get(key, 0.0)) + float(val)
84
+ if tmp:
85
+ proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
86
 
 
87
  md = f"**Prediction:** {pred_label}"
88
  if proba_dict:
 
89
  md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
90
 
 
91
  return proba_dict, md
92
 
93
+ EXAMPLES = [[3.5, 5.0, 3, 'good', 23, 7.0], [4.2, 6.5, 3, 'medium', 0, 6.5], [5.0, 4.0, 4, 'bad', 1, 6.0], [2.0, 7.5, 3, 'good', 22, 7.5], [3.8, 6.0, 3, 'medium', 0, 6.0], [4.5, 5.5, 3, 'good', 1, 7.0]]
94
 
 
 
 
 
 
 
 
 
 
 
 
95
  with gradio.Blocks() as demo:
 
96
  gradio.Markdown("# Predict Phone Use Before Bed")
97
+ gradio.Markdown(
98
+ "This app predicts whether a student uses their phone before bed based on their sleeping habits."
99
+ "\nEnter the student's sleeping habits below to get a prediction."
100
+ )
101
 
102
  with gradio.Row():
103
  phone_hours = gradio.Number(value=3.5, precision=1, label=FEATURE_COLS_MODEL[0])
 
109
  sleep_time = gradio.Number(value=23, precision=0, label=FEATURE_COLS_MODEL[4])
110
  sleep_hours = gradio.Number(value=7.0, precision=1, label=FEATURE_COLS_MODEL[5])
111
 
 
 
112
  proba_pretty = gradio.Label(num_top_classes=2, label="Probability of Using Phone Before Bed")
113
+ prediction_output = gradio.Markdown()
114
 
 
115
  inputs = [phone_hours, computer_hours, device_count, sleep_quality_label, sleep_time, sleep_hours]
 
116
  outputs = [proba_pretty, prediction_output]
117
  for comp in inputs:
118
  comp.change(fn=do_predict, inputs=inputs, outputs=outputs)
 
126
  )
127
 
128
  if __name__ == "__main__":
129
+ demo.launch(debug=False)