Spaces:

zacCMU
/

24679_Tabular

Sleeping

App Files Files Community

24679_Tabular / app.py

zacCMU

Update app.py from Colab

adc0faa verified 4 months ago

raw

history blame contribute delete

6.4 kB

	import os # For filesystem operations
	import shutil # For directory cleanup
	import zipfile # For extracting model archives
	import pathlib # For path manipulations
	import pandas # For tabular data handling
	import gradio # For interactive UI
	import huggingface_hub # For downloading model assets
	import autogluon.tabular # For loading and running AutoGluon predictors
	from huggingface_hub import HfApi
	# Settings
	api = HfApi()

	MODEL_REPO_ID = "jennifee/classical_automl_model"
	ZIP_FILENAME = "autogluon_predictor_dir.zip"
	CACHE_DIR = pathlib.Path("hf_assets")
	EXTRACT_DIR = CACHE_DIR / "predictor_native"
	# Feature column names and target column names
	FEATURE_COLS = ['phone_hours',
	'computer_hours',
	'device_count',
	'use_before_bed',
	'sleep_time',
	'sleep_hours'
	]
	TARGET_COL = "sleep_quality"
	# Encoding for likert questions
	# Encoding for likert questions
	LIKERT5_LABELS = ["Never", "Rarely", "Sometimes", "Often", "Very Often"]
	LIKERT5_MAP = {label: idx for idx, label in enumerate(LIKERT5_LABELS)}

	# Encoding for outcome questions
	OUTCOME_LABELS = {
	0: "Low Sleep Quality",
	1: "High Sleep Quality",
	}

	# Download & load the native predictor
	def _prepare_predictor_dir() -> str:
	CACHE_DIR.mkdir(parents=True, exist_ok=True)
	local_zip = huggingface_hub.hf_hub_download(
	repo_id=MODEL_REPO_ID,
	filename=ZIP_FILENAME,
	repo_type="model",
	local_dir=str(CACHE_DIR),
	local_dir_use_symlinks=False,
	)
	if EXTRACT_DIR.exists():
	shutil.rmtree(EXTRACT_DIR)
	EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
	with zipfile.ZipFile(local_zip, "r") as zf:
	zf.extractall(str(EXTRACT_DIR))
	contents = list(EXTRACT_DIR.iterdir())
	predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
	return str(predictor_root)

	PREDICTOR_DIR = _prepare_predictor_dir()
	PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)


	# A mapping utility to make it easier to encode the variables
	def _human_label(c):
	try:
	ci = int(c)
	if ci in OUTCOME_LABELS:
	return OUTCOME_LABELS[ci]
	except Exception:
	pass
	if c in OUTCOME_LABELS:
	return OUTCOME_LABELS[c]
	return str(c)

	# This functions takes all of our features, encodes this accordingly, and performs a predictions
	def do_predict(phone_hours, computer_hours, device_count, use_before_bed_label, sleep_time, sleep_hours):
	# Note: sleep_quality is the target variable, not an input feature for prediction
	# use_before_bed is a Likert scale question
	use_before_bed_code = LIKERT5_MAP[use_before_bed_label]


	row = {
	FEATURE_COLS[0]: float(phone_hours),
	FEATURE_COLS[1]: float(computer_hours),
	FEATURE_COLS[2]: int(device_count),
	FEATURE_COLS[3]: int(use_before_bed_code), # Index 3 for 'use_before_bed'
	FEATURE_COLS[4]: float(sleep_time),
	FEATURE_COLS[5]: float(sleep_hours),
	}
	X = pandas.DataFrame([row], columns=[col for col in FEATURE_COLS if col != TARGET_COL]) # Exclude target column from input

	pred_series = PREDICTOR.predict(X)
	raw_pred = pred_series.iloc[0]

	try:
	proba = PREDICTOR.predict_proba(X)
	if isinstance(proba, pandas.Series):
	proba = proba.to_frame().T
	elif isinstance(proba, pandas.DataFrame):
	pass # proba is already a DataFrame
	except Exception as e:
	print(f"Error getting probabilities: {e}")
	proba = None

	pred_label = _human_label(raw_pred)

	proba_dict = None
	if proba is not None:
	# Ensure proba is a DataFrame before accessing .iloc[0]
	if isinstance(proba, pandas.DataFrame) and not proba.empty:
	row0 = proba.iloc[0]
	tmp = {}
	for cls, val in row0.items():
	key = _human_label(cls)
	tmp[key] = float(val) + float(tmp.get(key, 0.0))
	proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
	else:
	print("Probability DataFrame is empty or not a DataFrame.")


	df_out = pandas.DataFrame([{
	"Predicted outcome": pred_label,
	"Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2),
	}])

	md = f"Prediction: {pred_label}"
	if proba_dict:
	md += f" \nConfidence: {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"

	return proba_dict

	# Representative examples - Updated to match the new FEATURE_COLS
	EXAMPLES = [
	[2.5, 4.0, 3, "Sometimes", 23.0, 7.0], # Example 1
	[1.0, 8.0, 5, "Very Often", 1.0, 5.0], # Example 2
	[5.0, 2.0, 2, "Never", 22.5, 8.5], # Example 3
	[0.5, 10.0, 4, "Often", 0.0, 6.0], # Example 4
	[3.0, 3.0, 1, "Rarely", 23.5, 7.5], # Example 5
	]

	# Gradio UI
	with gradio.Blocks() as demo:
	# Provide an introduction
	gradio.Markdown("# Sleep Quality Predictor")
	gradio.Markdown("""
	This app predicts sleep quality based on device usage and sleep habits.
	Adjust the inputs below to see the predicted sleep quality.
	""")

	with gradio.Row():
	phone_hours = gradio.Slider(0, 24, step=0.1, value=2.5, label=FEATURE_COLS[0])
	computer_hours = gradio.Slider(0, 24, step=0.1, value=4.0, label=FEATURE_COLS[1])
	device_count = gradio.Number(value=3, precision=0, label=FEATURE_COLS[2])

	with gradio.Row():
	use_before_bed_label = gradio.Radio(choices=LIKERT5_LABELS, value="Sometimes", label=FEATURE_COLS[3]) # Corrected index to 3

	with gradio.Row():
	sleep_time = gradio.Slider(0, 24, step=0.1, value=23.0, label=FEATURE_COLS[4]) # Corrected index to 4
	sleep_hours = gradio.Slider(0, 12, step=0.1, value=7.0, label=FEATURE_COLS[5]) # Corrected index to 5


	proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") # Changed to 2 classes

	# Inputs to the do_predict function
	inputs = [phone_hours, computer_hours, device_count, use_before_bed_label, sleep_time, sleep_hours]
	for comp in inputs:
	comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])

	gradio.Examples(
	examples=EXAMPLES,
	inputs=inputs,
	label="Representative examples",
	examples_per_page=5,
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch()