FaiyazAzam's picture
Update app.py
34b0951 verified
import os
import pandas as pd
import gradio as gr
from huggingface_hub import snapshot_download
from autogluon.tabular import TabularPredictor
MODEL_REPO_ID = "bareethul/AutoML-books-classification"
PREDICTOR_SUBDIR = None # None to auto-discover
def find_predictor_dir(root: str) -> str:
hits = []
for dirpath, _, filenames in os.walk(root):
if {"learner.pkl","predictor.pkl"}.issubset(set(filenames)):
hits.append(dirpath)
if not hits:
raise FileNotFoundError(
f"Could not find an AutoGluon predictor folder under {root}. "
"Repo must contain a directory with learner.pkl and predictor.pkl."
)
hits.sort(key=lambda p: len(p))
return hits[-1]
def load_predictor_from_repo(repo_id: str, subdir: str | None = None) -> TabularPredictor:
local_root = snapshot_download(repo_id=repo_id, repo_type="model")
predictor_dir = os.path.join(local_root, subdir) if subdir else find_predictor_dir(local_root)
print("Using predictor dir:", predictor_dir, flush=True)
return TabularPredictor.load(predictor_dir, require_py_version_match=False)
PREDICTOR = load_predictor_from_repo(MODEL_REPO_ID, PREDICTOR_SUBDIR)
TARGET_COL = PREDICTOR.label
ALL_FEATURES = PREDICTOR.feature_metadata.get_features()
PROBLEM_TYPE = PREDICTOR.problem_type
def safe_get_models(predictor) -> list:
try:
return list(predictor.get_model_names())
except Exception:
try:
lb = predictor.leaderboard(silent=True)
return lb["model"].tolist()
except Exception:
return []
ALL_MODELS = safe_get_models(PREDICTOR)
HAS_MODEL_CHOICES = len(ALL_MODELS) > 0
PREFERRED_FEATURE_ORDER = ["FictionorNonfiction","NumPages","ThicknessInches","ReadUnfinishedorUnread"]
ORDERED_FEATURES = [c for c in PREFERRED_FEATURE_ORDER if c in ALL_FEATURES] + [c for c in ALL_FEATURES if c not in PREFERRED_FEATURE_ORDER]
DD_FICTION = ["Fiction", "Nonfiction"]
DD_READ_STATUS = ["Read", "Unfinished", "Unread"]
DEFAULTS = {"FictionorNonfiction":"Fiction","NumPages":250,"ThicknessInches":0.85,"ReadUnfinishedorUnread":"Unread"}
def build_input_df(values_dict):
row = {col: values_dict.get(col, None) for col in ORDERED_FEATURES}
return pd.DataFrame([row], columns=ORDERED_FEATURES)
def predict_one(fiction_nonfiction, num_pages, thickness, read_status, decision_threshold, selected_model):
values = {}
if "FictionorNonfiction" in ORDERED_FEATURES: values["FictionorNonfiction"] = fiction_nonfiction
if "NumPages" in ORDERED_FEATURES: values["NumPages"] = int(num_pages)
if "ThicknessInches" in ORDERED_FEATURES: values["ThicknessInches"] = float(thickness)
if "ReadUnfinishedorUnread" in ORDERED_FEATURES: values["ReadUnfinishedorUnread"] = read_status
X = build_input_df(values)
# probabilities (if available)
try:
use_model = selected_model if selected_model else None
proba = PREDICTOR.predict_proba(X, model=use_model)
if isinstance(proba, pd.Series): # normalize binary
proba = proba.to_frame().T
proba_pretty = {str(k): float(v) for k, v in proba.iloc[0].to_dict().items()}
except Exception:
proba, proba_pretty = None, None
# default predicted label
pred_series = PREDICTOR.predict(X, model=(selected_model or None))
final_label = str(pred_series.iloc[0])
# Optional threshold (binary only)
if proba is not None and len(proba.columns) == 2:
classes = list(map(str, proba.columns))
positive_class = "Yes" if "Yes" in classes else classes[-1]
p_pos = float(proba.iloc[0][positive_class])
final_label = positive_class if p_pos >= decision_threshold else [c for c in classes if c != positive_class][0]
top_conf = None
if proba_pretty:
top_conf = round(100.0 * float(proba_pretty.get(final_label, 0.0)), 2)
table = pd.DataFrame([{"Predicted": final_label,
"Confidence (%)": top_conf if top_conf is not None else "—",
"Model (optional)": (selected_model or "(auto)"),
"Target": TARGET_COL}])
return table, proba_pretty
APP_TITLE = "📚 Book Insights — Will I Recommend It?"
APP_SUBTITLE = "Single-record tabular inference on a bookshelf dataset (AutoGluon + Gradio)"
with gr.Blocks(title=APP_TITLE) as demo:
gr.Markdown(
f"# {APP_TITLE}\n{APP_SUBTITLE}\n\n"
f"**Target:** `{TARGET_COL}` • **Problem:** `{PROBLEM_TYPE}`"
)
with gr.Row():
with gr.Column():
fiction_nonfiction = gr.Radio(DD_FICTION, value=DEFAULTS["FictionorNonfiction"],
label="Fiction or Nonfiction", visible=("FictionorNonfiction" in ORDERED_FEATURES))
read_status = gr.Radio(DD_READ_STATUS, value=DEFAULTS["ReadUnfinishedorUnread"],
label="Read / Unfinished / Unread", visible=("ReadUnfinishedorUnread" in ORDERED_FEATURES))
num_pages = gr.Slider(50, 1200, step=1, value=DEFAULTS["NumPages"],
label="Number of Pages", visible=("NumPages" in ORDERED_FEATURES))
thickness = gr.Slider(0.2, 3.0, step=0.01, value=DEFAULTS["ThicknessInches"],
label="Thickness (inches)", visible=("ThicknessInches" in ORDERED_FEATURES))
with gr.Accordion("Inference Parameters", open=False):
decision_threshold = gr.Slider(0.0, 1.0, value=0.5, step=0.01, label="Decision Threshold (binary only)")
base_model = gr.Dropdown(choices=([""] + ALL_MODELS) if HAS_MODEL_CHOICES else [""], value="",
label="Restrict to a specific base model (optional)", visible=HAS_MODEL_CHOICES)
run_btn = gr.Button("Run Prediction")
with gr.Column():
# No height/wrap args for broader Gradio compatibility
out_table = gr.Dataframe(headers=["Predicted","Confidence (%)","Model (optional)","Target"],
interactive=False, label="Prediction")
out_probs = gr.Label(num_top_classes=5, label="Class Probabilities (top-k)")
inputs = [fiction_nonfiction, num_pages, thickness, read_status, decision_threshold,
base_model if HAS_MODEL_CHOICES else gr.State("")]
run_btn.click(predict_one, inputs=inputs, outputs=[out_table, out_probs])
# [Fiction/Nonfiction, NumPages, ThicknessInches, ReadStatus, DecisionThreshold, BaseModel(or "")]
_tail = [""] # placeholder for the optional base-model input (works whether visible or hidden)
examples = [
["Fiction", 211, 0.84, "Unread", 0.50] + _tail,
["Fiction", 361, 0.99, "Unfinished", 0.50] + _tail,
["Nonfiction", 260, 0.95, "Unfinished", 0.50] + _tail,
["Fiction", 402, 1.31, "Read", 0.60] + _tail,
["Fiction", 122, 0.52, "Unread", 0.40] + _tail,
]
gr.Examples(
examples=examples,
inputs=inputs,
label="Representative examples",
examples_per_page=5,
cache_examples=False, # keep it live with your current model
)
if __name__ == "__main__":
demo.launch(debug=True)