|
|
import os |
|
|
import shutil |
|
|
import zipfile |
|
|
import pathlib |
|
|
import pandas |
|
|
import gradio |
|
|
import huggingface_hub |
|
|
import autogluon.tabular |
|
|
|
|
|
|
|
|
MODEL_REPO_ID = "SebastianAndreu/2025-24679-HW1-Part1-tabular-autolguon-predictor" |
|
|
ZIP_FILENAME = "autogluon_predictor_dir.zip" |
|
|
CACHE_DIR = pathlib.Path("hf_assets") |
|
|
EXTRACT_DIR = CACHE_DIR / "predictor_native" |
|
|
|
|
|
|
|
|
FEATURE_COLS = [ |
|
|
"length_cm", |
|
|
"weight_g", |
|
|
"is_metallic", |
|
|
"body_shape", |
|
|
"has_clip", |
|
|
] |
|
|
TARGET_COL = "is_pen" |
|
|
|
|
|
|
|
|
metallic_labels = ["no", "yes"] |
|
|
metallic_map = {label: idx for idx, label in enumerate(metallic_labels)} |
|
|
|
|
|
shape_labels = ["hexagonal", "other", "round", "triangle"] |
|
|
shape_map = {label: idx for idx, label in enumerate(shape_labels)} |
|
|
|
|
|
clip_labels = ["no", "yes"] |
|
|
clip_map = {label: idx for idx, label in enumerate(clip_labels)} |
|
|
|
|
|
is_pen_labels = { |
|
|
0: "pencil", |
|
|
1: "pen", |
|
|
} |
|
|
|
|
|
|
|
|
def _prepare_predictor_dir() -> str: |
|
|
CACHE_DIR.mkdir(parents=True, exist_ok=True) |
|
|
local_zip = huggingface_hub.hf_hub_download( |
|
|
repo_id=MODEL_REPO_ID, |
|
|
filename=ZIP_FILENAME, |
|
|
repo_type="model", |
|
|
local_dir=str(CACHE_DIR), |
|
|
local_dir_use_symlinks=False, |
|
|
) |
|
|
if EXTRACT_DIR.exists(): |
|
|
shutil.rmtree(EXTRACT_DIR) |
|
|
EXTRACT_DIR.mkdir(parents=True, exist_ok=True) |
|
|
with zipfile.ZipFile(local_zip, "r") as zf: |
|
|
zf.extractall(str(EXTRACT_DIR)) |
|
|
contents = list(EXTRACT_DIR.iterdir()) |
|
|
predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR |
|
|
return str(predictor_root) |
|
|
|
|
|
PREDICTOR_DIR = _prepare_predictor_dir() |
|
|
PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) |
|
|
|
|
|
|
|
|
def _human_label(c): |
|
|
try: |
|
|
ci = int(c) |
|
|
if ci in is_pen_labels: |
|
|
return is_pen_labels[ci] |
|
|
except Exception: |
|
|
pass |
|
|
if c in is_pen_labels: |
|
|
return is_pen_labels[c] |
|
|
return str(c) |
|
|
|
|
|
|
|
|
def do_predict(length, weight, metallic, shape, clip): |
|
|
metallic_code = metallic_map[metallic] |
|
|
shape_code = shape_map[shape] |
|
|
clip_code = clip_map[clip] |
|
|
|
|
|
row = { |
|
|
FEATURE_COLS[0]: float(length), |
|
|
FEATURE_COLS[1]: float(weight), |
|
|
FEATURE_COLS[2]: int(metallic_code), |
|
|
FEATURE_COLS[3]: int(shape_code), |
|
|
FEATURE_COLS[4]: int(clip_code), |
|
|
} |
|
|
X = pandas.DataFrame([row], columns=FEATURE_COLS) |
|
|
|
|
|
pred_series = PREDICTOR.predict(X) |
|
|
raw_pred = pred_series.iloc[0] |
|
|
|
|
|
try: |
|
|
proba = PREDICTOR.predict_proba(X) |
|
|
if isinstance(proba, pandas.Series): |
|
|
proba = proba.to_frame().T |
|
|
except Exception: |
|
|
proba = None |
|
|
|
|
|
pred_label = _human_label(raw_pred) |
|
|
|
|
|
proba_dict = None |
|
|
if proba is not None: |
|
|
row0 = proba.iloc[0] |
|
|
tmp = {} |
|
|
for cls, val in row0.items(): |
|
|
key = _human_label(cls) |
|
|
tmp[key] = float(val) + float(tmp.get(key, 0.0)) |
|
|
proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True)) |
|
|
|
|
|
df_out = pandas.DataFrame([{ |
|
|
"Predicted outcome": pred_label, |
|
|
"Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2), |
|
|
}]) |
|
|
|
|
|
md = f"**Prediction:** {pred_label}" |
|
|
if proba_dict: |
|
|
md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%" |
|
|
|
|
|
return proba_dict |
|
|
|
|
|
|
|
|
EXAMPLES = [ |
|
|
[5.0, 300.0, "no", "hexagonal", "yes"], |
|
|
[18.0, 1500.0, "yes", "triangle", "no"], |
|
|
[12.0, 8000.0, "no", "other", "yes"], |
|
|
[4.0, 120.0, "yes", "round", "no"], |
|
|
[22.0, 500.0, "yes", "hexagonal", "no"], |
|
|
] |
|
|
|
|
|
|
|
|
with gradio.Blocks() as demo: |
|
|
|
|
|
gradio.Markdown("# Is it a pen or a pencil?") |
|
|
gradio.Markdown(""" |
|
|
This is an app that can determine if a writing utensil is a pen or pencil. To use the interface, make |
|
|
selections using the interface elements shown below. |
|
|
""") |
|
|
|
|
|
with gradio.Row(): |
|
|
length = gradio.Slider(0, 50, step=0.1, value=10.0, label="length") |
|
|
weight = gradio.Slider(0, 50, step=0.1, value=10.0, label="width") |
|
|
|
|
|
with gradio.Row(): |
|
|
metallic = gradio.Radio(choices=metallic_labels, value="no", label="Is it metallic?") |
|
|
shape = gradio.Radio(choices=shape_labels, value="hexagonal", label="What shape is it?") |
|
|
clip = gradio.Radio(choices=clip_labels, value="no", label="Does it have a clip") |
|
|
|
|
|
proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities") |
|
|
|
|
|
inputs = [length, weight, metallic, shape, clip] |
|
|
for comp in inputs: |
|
|
comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty]) |
|
|
|
|
|
gradio.Examples( |
|
|
examples=EXAMPLES, |
|
|
inputs=inputs, |
|
|
label="Representative examples", |
|
|
examples_per_page=5, |
|
|
cache_examples=False, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |