Spaces:

samder03
/

2025-24679-tabular-interface

Sleeping

2025-24679-tabular-interface

File size: 5,351 Bytes

012380e

import os  # For filesystem operations
import shutil  # For directory cleanup
import zipfile  # For extracting model archives
import pathlib  # For path manipulations
import pandas  # For tabular data handling
import gradio  # For interactive UI
import huggingface_hub  # For downloading model assets
import autogluon.tabular  # For loading and running AutoGluon predictors

# Settings
MODEL_REPO_ID = "SebastianAndreu/2025-24679-HW1-Part1-tabular-autolguon-predictor"
ZIP_FILENAME  = "autogluon_predictor_dir.zip"
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"

# Feature column names and target column names
FEATURE_COLS = [
    "length_cm",
    "weight_g",
    "is_metallic",
    "body_shape",
    "has_clip",
]
TARGET_COL = "is_pen"

# Encoding for questions
metallic_labels = ["no", "yes"]
metallic_map = {label: idx for idx, label in enumerate(metallic_labels)}

shape_labels = ["hexagonal", "other", "round", "triangle"]
shape_map = {label: idx for idx, label in enumerate(shape_labels)}

clip_labels = ["no", "yes"]
clip_map = {label: idx for idx, label in enumerate(clip_labels)}

is_pen_labels = {
    0: "pencil",
    1: "pen",
}

# Download & load the native predictor
def _prepare_predictor_dir() -> str:
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    local_zip = huggingface_hub.hf_hub_download(
        repo_id=MODEL_REPO_ID,
        filename=ZIP_FILENAME,
        repo_type="model",
        local_dir=str(CACHE_DIR),
        local_dir_use_symlinks=False,
    )
    if EXTRACT_DIR.exists():
        shutil.rmtree(EXTRACT_DIR)
    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(local_zip, "r") as zf:
        zf.extractall(str(EXTRACT_DIR))
    contents = list(EXTRACT_DIR.iterdir())
    predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
    return str(predictor_root)

PREDICTOR_DIR = _prepare_predictor_dir()
PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)

# A mapping utility to make it easier to encode the variables
def _human_label(c):
    try:
        ci = int(c)
        if ci in is_pen_labels:
            return is_pen_labels[ci]
    except Exception:
        pass
    if c in is_pen_labels:
        return is_pen_labels[c]
    return str(c)

# This functions takes all of our features,e ncodes this accordingly, and performs a predictions
def do_predict(length, weight, metallic, shape, clip):
    metallic_code = metallic_map[metallic]
    shape_code = shape_map[shape]
    clip_code = clip_map[clip]

    row = {
        FEATURE_COLS[0]: float(length),
        FEATURE_COLS[1]: float(weight),
        FEATURE_COLS[2]: int(metallic_code),
        FEATURE_COLS[3]: int(shape_code),
        FEATURE_COLS[4]: int(clip_code),
    }
    X = pandas.DataFrame([row], columns=FEATURE_COLS)

    pred_series = PREDICTOR.predict(X)
    raw_pred = pred_series.iloc[0]

    try:
        proba = PREDICTOR.predict_proba(X)
        if isinstance(proba, pandas.Series):
            proba = proba.to_frame().T
    except Exception:
        proba = None

    pred_label = _human_label(raw_pred)

    proba_dict = None
    if proba is not None:
        row0 = proba.iloc[0]
        tmp = {}
        for cls, val in row0.items():
            key = _human_label(cls)
            tmp[key] = float(val) + float(tmp.get(key, 0.0))
        proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True))

    df_out = pandas.DataFrame([{
        "Predicted outcome": pred_label,
        "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2),
    }])

    md = f"**Prediction:** {pred_label}"
    if proba_dict:
        md += f"  \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%"

    return proba_dict

# Representative examples
EXAMPLES = [
    [5.0, 300.0, "no", "hexagonal", "yes"],
    [18.0, 1500.0, "yes", "triangle", "no"],
    [12.0, 8000.0, "no", "other", "yes"],
    [4.0, 120.0, "yes",   "round", "no"],
    [22.0, 500.0, "yes", "hexagonal", "no"],
]

# Gradio UI
with gradio.Blocks() as demo:
    # Provide an introduction
    gradio.Markdown("# Is it a pen or a pencil?")
    gradio.Markdown("""
    This is an app that can determine if a writing utensil is a pen or pencil. To use the interface, make
    selections using the interface elements shown below.
    """)

    with gradio.Row():
        length = gradio.Slider(0, 50, step=0.1, value=10.0, label="length")
        weight = gradio.Slider(0, 50, step=0.1, value=10.0, label="width")

    with gradio.Row():
        metallic = gradio.Radio(choices=metallic_labels, value="no", label="Is it metallic?")
        shape = gradio.Radio(choices=shape_labels, value="hexagonal", label="What shape is it?")
        clip = gradio.Radio(choices=clip_labels, value="no", label="Does it have a clip")

    proba_pretty = gradio.Label(num_top_classes=2, label="Class probabilities")

    inputs = [length, weight, metallic, shape, clip]
    for comp in inputs:
        comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])

    gradio.Examples(
        examples=EXAMPLES,
        inputs=inputs,
        label="Representative examples",
        examples_per_page=5,
        cache_examples=False,
    )

if __name__ == "__main__":
    demo.launch()