samder03's picture
Update app.py
bdfd230 verified
import os # For filesystem operations
import shutil # For directory cleanup
import zipfile # For extracting model archives
import pathlib # For path manipulations
import pandas # For tabular data handling
import gradio # For interactive UI
import huggingface_hub # For downloading model assets
import autogluon.tabular # For loading and running AutoGluon predictors
# Settings
MODEL_REPO_ID = "SebastianAndreu/2025-24679-HW1-Part1-tabular-autolguon-predictor"
ZIP_FILENAME = "autogluon_predictor_dir.zip"
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"
# Feature column names and target column names
FEATURE_COLS = [
"length_cm",
"weight_g",
"is_metallic",
"body_shape",
"has_clip",
]
TARGET_COL = "is_pen"
# Encoding for questions
metallic_labels = ["no", "yes"]
metallic_map = {label: idx for idx, label in enumerate(metallic_labels)}
shape_labels = ["hexagonal", "other", "round", "triangle"]
shape_map = {label: idx for idx, label in enumerate(shape_labels)}
clip_labels = ["no", "yes"]
clip_map = {label: idx for idx, label in enumerate(clip_labels)}
is_pen_labels = {
0: "pencil",
1: "pen",
}
# Download & load the native predictor
def _prepare_predictor_dir() -> str:
CACHE_DIR.mkdir(parents=True, exist_ok=True)
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="model",
local_dir=str(CACHE_DIR),
local_dir_use_symlinks=False,
)
if EXTRACT_DIR.exists():
shutil.rmtree(EXTRACT_DIR)
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(local_zip, "r") as zf:
zf.extractall(str(EXTRACT_DIR))
contents = list(EXTRACT_DIR.iterdir())
predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
return str(predictor_root)
PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
# A mapping utility to make it easier to encode the variables
def _human_label(c):
try:
ci = int(c)
if ci in is_pen_labels:
return is_pen_labels[ci]
except Exception:
pass
if c in is_pen_labels:
return is_pen_labels[c]
return str(c)
# This functions takes all of our features,e ncodes this accordingly, and performs a predictions
def do_predict(length, weight, metallic, shape, clip):
metallic_code = metallic_map[metallic]
shape_code = shape_map[shape]
clip_code = clip_map[clip]
row = {
FEATURE_COLS[0]: float(length),
FEATURE_COLS[1]: float(weight),
FEATURE_COLS[2]: int(metallic_code),
FEATURE_COLS[3]: int(shape_code),
FEATURE_COLS[4]: int(clip_code),
}
X = pandas.DataFrame([row], columns=FEATURE_COLS)
pred_series = PREDICTOR.predict(X)
raw_pred = pred_series.iloc[0]
try:
proba = PREDICTOR.predict_proba(X)
if isinstance(proba, pandas.Series):
proba = proba.to_frame().T
except Exception:
proba = None
pred_label = _human_label(raw_pred)
proba_dict = None
if proba is not None:
row0 = proba.iloc[0]
tmp = {}
for cls, val in row0.items():
key = _human_label(cls)
tmp[key] = float(val) + float(tmp.get(key, 0.0))
proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))
df_out = pandas.DataFrame([{
"Predicted outcome": pred_label,
"Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2),
}])
md = f"**Prediction:** {pred_label}"
if proba_dict:
md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"
return proba_dict
# Representative examples
EXAMPLES = [
[5.0, 300.0, "no", "hexagonal", "yes"],
[18.0, 1500.0, "yes", "triangle", "no"],
[12.0, 8000.0, "no", "other", "yes"],
[4.0, 120.0, "yes", "round", "no"],
[22.0, 500.0, "yes", "hexagonal", "no"],
]
# Gradio UI
with gradio.Blocks() as demo:
# Provide an introduction
gradio.Markdown("# Is it a pen or a pencil?")
gradio.Markdown("""
This is an app that can determine if a writing utensil is a pen or pencil. To use the interface, make
selections using the interface elements shown below.
""")
with gradio.Row():
length = gradio.Slider(0, 50, step=0.1, value=10.0, label="length")
weight = gradio.Slider(0, 50, step=0.1, value=10.0, label="width")
with gradio.Row():
metallic = gradio.Radio(choices=metallic_labels, value="no", label="Is it metallic?")
shape = gradio.Radio(choices=shape_labels, value="hexagonal", label="What shape is it?")
clip = gradio.Radio(choices=clip_labels, value="no", label="Does it have a clip")
proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities")
inputs = [length, weight, metallic, shape, clip]
for comp in inputs:
comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])
gradio.Examples(
examples=EXAMPLES,
inputs=inputs,
label="Representative examples",
examples_per_page=5,
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()