|
|
| import os |
| import pandas as pd |
| import gradio as gr |
| from huggingface_hub import snapshot_download |
| from autogluon.tabular import TabularPredictor |
|
|
| MODEL_REPO_ID = "bareethul/AutoML-books-classification" |
| PREDICTOR_SUBDIR = None |
|
|
| def find_predictor_dir(root: str) -> str: |
| hits = [] |
| for dirpath, _, filenames in os.walk(root): |
| if {"learner.pkl","predictor.pkl"}.issubset(set(filenames)): |
| hits.append(dirpath) |
| if not hits: |
| raise FileNotFoundError( |
| f"Could not find an AutoGluon predictor folder under {root}. " |
| "Repo must contain a directory with learner.pkl and predictor.pkl." |
| ) |
| hits.sort(key=lambda p: len(p)) |
| return hits[-1] |
|
|
| def load_predictor_from_repo(repo_id: str, subdir: str | None = None) -> TabularPredictor: |
| local_root = snapshot_download(repo_id=repo_id, repo_type="model") |
| predictor_dir = os.path.join(local_root, subdir) if subdir else find_predictor_dir(local_root) |
| print("Using predictor dir:", predictor_dir, flush=True) |
| return TabularPredictor.load(predictor_dir, require_py_version_match=False) |
|
|
| PREDICTOR = load_predictor_from_repo(MODEL_REPO_ID, PREDICTOR_SUBDIR) |
|
|
| TARGET_COL = PREDICTOR.label |
| ALL_FEATURES = PREDICTOR.feature_metadata.get_features() |
| PROBLEM_TYPE = PREDICTOR.problem_type |
|
|
| def safe_get_models(predictor) -> list: |
| try: |
| return list(predictor.get_model_names()) |
| except Exception: |
| try: |
| lb = predictor.leaderboard(silent=True) |
| return lb["model"].tolist() |
| except Exception: |
| return [] |
| ALL_MODELS = safe_get_models(PREDICTOR) |
| HAS_MODEL_CHOICES = len(ALL_MODELS) > 0 |
|
|
| PREFERRED_FEATURE_ORDER = ["FictionorNonfiction","NumPages","ThicknessInches","ReadUnfinishedorUnread"] |
| ORDERED_FEATURES = [c for c in PREFERRED_FEATURE_ORDER if c in ALL_FEATURES] + [c for c in ALL_FEATURES if c not in PREFERRED_FEATURE_ORDER] |
|
|
| DD_FICTION = ["Fiction", "Nonfiction"] |
| DD_READ_STATUS = ["Read", "Unfinished", "Unread"] |
|
|
| DEFAULTS = {"FictionorNonfiction":"Fiction","NumPages":250,"ThicknessInches":0.85,"ReadUnfinishedorUnread":"Unread"} |
|
|
| def build_input_df(values_dict): |
| row = {col: values_dict.get(col, None) for col in ORDERED_FEATURES} |
| return pd.DataFrame([row], columns=ORDERED_FEATURES) |
|
|
| def predict_one(fiction_nonfiction, num_pages, thickness, read_status, decision_threshold, selected_model): |
| values = {} |
| if "FictionorNonfiction" in ORDERED_FEATURES: values["FictionorNonfiction"] = fiction_nonfiction |
| if "NumPages" in ORDERED_FEATURES: values["NumPages"] = int(num_pages) |
| if "ThicknessInches" in ORDERED_FEATURES: values["ThicknessInches"] = float(thickness) |
| if "ReadUnfinishedorUnread" in ORDERED_FEATURES: values["ReadUnfinishedorUnread"] = read_status |
| X = build_input_df(values) |
|
|
| |
| try: |
| use_model = selected_model if selected_model else None |
| proba = PREDICTOR.predict_proba(X, model=use_model) |
| if isinstance(proba, pd.Series): |
| proba = proba.to_frame().T |
| proba_pretty = {str(k): float(v) for k, v in proba.iloc[0].to_dict().items()} |
| except Exception: |
| proba, proba_pretty = None, None |
|
|
| |
| pred_series = PREDICTOR.predict(X, model=(selected_model or None)) |
| final_label = str(pred_series.iloc[0]) |
|
|
| |
| if proba is not None and len(proba.columns) == 2: |
| classes = list(map(str, proba.columns)) |
| positive_class = "Yes" if "Yes" in classes else classes[-1] |
| p_pos = float(proba.iloc[0][positive_class]) |
| final_label = positive_class if p_pos >= decision_threshold else [c for c in classes if c != positive_class][0] |
|
|
| top_conf = None |
| if proba_pretty: |
| top_conf = round(100.0 * float(proba_pretty.get(final_label, 0.0)), 2) |
|
|
| table = pd.DataFrame([{"Predicted": final_label, |
| "Confidence (%)": top_conf if top_conf is not None else "—", |
| "Model (optional)": (selected_model or "(auto)"), |
| "Target": TARGET_COL}]) |
| return table, proba_pretty |
|
|
| APP_TITLE = "📚 Book Insights — Will I Recommend It?" |
| APP_SUBTITLE = "Single-record tabular inference on a bookshelf dataset (AutoGluon + Gradio)" |
|
|
| with gr.Blocks(title=APP_TITLE) as demo: |
| gr.Markdown( |
| f"# {APP_TITLE}\n{APP_SUBTITLE}\n\n" |
| f"**Target:** `{TARGET_COL}` • **Problem:** `{PROBLEM_TYPE}`" |
| ) |
| with gr.Row(): |
| with gr.Column(): |
| fiction_nonfiction = gr.Radio(DD_FICTION, value=DEFAULTS["FictionorNonfiction"], |
| label="Fiction or Nonfiction", visible=("FictionorNonfiction" in ORDERED_FEATURES)) |
| read_status = gr.Radio(DD_READ_STATUS, value=DEFAULTS["ReadUnfinishedorUnread"], |
| label="Read / Unfinished / Unread", visible=("ReadUnfinishedorUnread" in ORDERED_FEATURES)) |
| num_pages = gr.Slider(50, 1200, step=1, value=DEFAULTS["NumPages"], |
| label="Number of Pages", visible=("NumPages" in ORDERED_FEATURES)) |
| thickness = gr.Slider(0.2, 3.0, step=0.01, value=DEFAULTS["ThicknessInches"], |
| label="Thickness (inches)", visible=("ThicknessInches" in ORDERED_FEATURES)) |
| with gr.Accordion("Inference Parameters", open=False): |
| decision_threshold = gr.Slider(0.0, 1.0, value=0.5, step=0.01, label="Decision Threshold (binary only)") |
| base_model = gr.Dropdown(choices=([""] + ALL_MODELS) if HAS_MODEL_CHOICES else [""], value="", |
| label="Restrict to a specific base model (optional)", visible=HAS_MODEL_CHOICES) |
| run_btn = gr.Button("Run Prediction") |
| with gr.Column(): |
| |
| out_table = gr.Dataframe(headers=["Predicted","Confidence (%)","Model (optional)","Target"], |
| interactive=False, label="Prediction") |
| out_probs = gr.Label(num_top_classes=5, label="Class Probabilities (top-k)") |
| inputs = [fiction_nonfiction, num_pages, thickness, read_status, decision_threshold, |
| base_model if HAS_MODEL_CHOICES else gr.State("")] |
| run_btn.click(predict_one, inputs=inputs, outputs=[out_table, out_probs]) |
| |
| _tail = [""] |
| examples = [ |
| ["Fiction", 211, 0.84, "Unread", 0.50] + _tail, |
| ["Fiction", 361, 0.99, "Unfinished", 0.50] + _tail, |
| ["Nonfiction", 260, 0.95, "Unfinished", 0.50] + _tail, |
| ["Fiction", 402, 1.31, "Read", 0.60] + _tail, |
| ["Fiction", 122, 0.52, "Unread", 0.40] + _tail, |
| ] |
| |
| gr.Examples( |
| examples=examples, |
| inputs=inputs, |
| label="Representative examples", |
| examples_per_page=5, |
| cache_examples=False, |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(debug=True) |
|
|