# -*- coding: utf-8 -*- """ This application loads a trained AutoGluon TabularPredictor that was built on the ecopus/pokemon_cards dataset and exposes it through a Gradio interface. Users can enter details of a Pokémon card—including its name, release year, set, artwork style, condition, set-number equivalent, and market value—and the model will instantly predict whether the card is considered a collector’s item (“Yes” or “No”). The interface also displays the model’s class probabilities so users can see how confident the model is about each prediction. Dataset reference: https://huggingface.co/datasets/ecopus/pokemon_cards """ # ---------------------------- # Imports # ---------------------------- import os import shutil import zipfile import pathlib from typing import Any, Dict, List, Optional import pandas as pd import gradio as gr import huggingface_hub import autogluon.tabular # Optional: pull choices/ranges from the dataset (falls back if unavailable) try: from datasets import load_dataset HAS_DATASETS = True except Exception: HAS_DATASETS = False # ---------------------------- # Settings: point to your trained AutoGluon predictor on the Hub # ---------------------------- MODEL_REPO_ID = "samder03/2025-24679-tabular-autolguon-predictor" # <- CHANGE ME ZIP_FILENAME = "autogluon_predictor_dir.zip" # <- CHANGE if different CACHE_DIR = pathlib.Path("hf_assets") EXTRACT_DIR = CACHE_DIR / "predictor_native" # Columns must match training-time names exactly: FEATURE_COLS = [ "Card", # string "Year", # int "Card Set", # string "Artwork Style", # string "Condition", # string "Set Number Eq", # float "Market Value", # float ] TARGET_COL = "Collector's Item" # binary: "Yes"/"No" in the dataset # ---------------------------- # Load predictor (download zip from Hub, then autogluon load) # ---------------------------- def _prepare_predictor_dir() -> str: CACHE_DIR.mkdir(parents=True, exist_ok=True) local_zip = huggingface_hub.hf_hub_download( repo_id=MODEL_REPO_ID, filename=ZIP_FILENAME, repo_type="model", local_dir=str(CACHE_DIR), local_dir_use_symlinks=False, ) if EXTRACT_DIR.exists(): shutil.rmtree(EXTRACT_DIR) EXTRACT_DIR.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(local_zip, "r") as zf: zf.extractall(str(EXTRACT_DIR)) contents = list(EXTRACT_DIR.iterdir()) predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR return str(predictor_root) # If loading locally instead of the Hub, comment these two lines and set: # PREDICTOR_DIR = "/path/to/AutogluonModels/ag-" PREDICTOR_DIR = _prepare_predictor_dir() PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) # ---------------------------- # Helpers # ---------------------------- OUTCOME_LABELS = { "Yes": "Yes", "No": "No", 1: "Yes", 0: "No", "1": "Yes", "0": "No", True: "Yes", False: "No", } def _human_label(x: Any) -> str: return OUTCOME_LABELS.get(x, str(x)) def _normalize_proba_keys(row_probs: Dict[Any, float]) -> Dict[str, float]: normalized: Dict[str, float] = {} for k, v in row_probs.items(): key = _human_label(k) normalized[key] = float(v) + float(normalized.get(key, 0.0)) # sort high->low return dict(sorted(normalized.items(), key=lambda kv: kv[1], reverse=True)) # ---------------------------- # Dataset-driven choices/ranges (with safe fallbacks if offline) # ---------------------------- def get_dataset_metadata() -> dict: """ Try to pull unique choices and numeric ranges from ecopus/pokemon_cards. Falls back to hard-coded sensible defaults if the dataset lib or network is unavailable. """ meta = { "card_examples": ["Charizard", "Pikachu", "Mew", "Ivysaur"], "card_sets": [ "Base Set", "Pokemon 151", "Evolutions", "Prismatic Evolutions", "Journey Together", "Destined Rivals", "Stellar Crown", "BREAKpoint", "EX Sandstorm", "Double Crisis", "McDonalds" ], "art_styles": [ "Standard", "Holo", "Reverse Holo", "Full Art", "Full Art Gold", "Full Art Rainbow", "Alternate Art", "Trainer Gallery", "Promo", # include obvious typo seen in a sample row to avoid surprises: "Standart" ], "conditions": ["Mint", "Near Mint", "Lightly Played", "Heavily Played"], "year_min": 1995, "year_max": 2025, "sne_min": 0.04, "sne_max": 1.50, "mv_min": 0.08, "mv_max": 133.00, "examples_rows": [], # list of example rows matching FEATURE_COLS order } if not HAS_DATASETS: return meta try: ds = load_dataset("ecopus/pokemon_cards") # Merge splits if present split_names = [k for k in ds.keys()] frames: List[pd.DataFrame] = [] for sn in split_names: frames.append(pd.DataFrame(ds[sn])) df_all = pd.concat(frames, ignore_index=True) # Coerce types safely (in case commas exist in displayed values) def _to_int(x): try: return int(str(x).replace(",", "")) except Exception: return None def _to_float(x): try: return float(str(x).replace(",", "")) except Exception: return None # Compute unique choices if "Card Set" in df_all.columns: sets = sorted({str(s) for s in df_all["Card Set"].dropna().unique().tolist()}) if sets: meta["card_sets"] = sets if "Artwork Style" in df_all.columns: styles = sorted({str(s) for s in df_all["Artwork Style"].dropna().unique().tolist()}) if styles: # include 'Standart' if present meta["art_styles"] = styles if "Condition" in df_all.columns: conds = sorted({str(s) for s in df_all["Condition"].dropna().unique().tolist()}) if conds: meta["conditions"] = conds # Ranges if "Year" in df_all.columns: years = [y for y in df_all["Year"].map(_to_int).dropna().tolist()] if years: meta["year_min"] = min(years) meta["year_max"] = max(years) if "Set Number Eq" in df_all.columns: sne = [s for s in df_all["Set Number Eq"].map(_to_float).dropna().tolist()] if sne: meta["sne_min"] = float(min(sne)) meta["sne_max"] = float(max(sne)) if "Market Value" in df_all.columns: mv = [m for m in df_all["Market Value"].map(_to_float).dropna().tolist()] if mv: meta["mv_min"] = float(min(mv)) meta["mv_max"] = float(max(mv)) # Example rows (grab up to 5 reasonable examples) cols_ok = all(c in df_all.columns for c in FEATURE_COLS) if cols_ok: sample = df_all[FEATURE_COLS].dropna().head(5) meta["examples_rows"] = sample.values.tolist() # Some card names to seed the textbox suggestions if "Card" in df_all.columns: meta["card_examples"] = df_all["Card"].dropna().astype(str).head(8).tolist() except Exception: pass return meta META = get_dataset_metadata() # ---------------------------- # Prediction function # ---------------------------- def do_predict(card_name: str, year: float, card_set: str, artwork_style: str, condition: str, set_number_eq: float, market_value: float): # Build a single-row DataFrame exactly matching training columns row = { "Card": str(card_name).strip(), "Year": int(year), "Card Set": str(card_set).strip(), "Artwork Style": str(artwork_style).strip(), "Condition": str(condition).strip(), "Set Number Eq": float(set_number_eq), "Market Value": float(market_value), } X = pd.DataFrame([row], columns=FEATURE_COLS) # Predict label pred_series = PREDICTOR.predict(X) raw_pred = pred_series.iloc[0] pred_label = _human_label(raw_pred) # Predict probabilities (if available) try: proba = PREDICTOR.predict_proba(X) if isinstance(proba, pd.Series): # AutoGluon can return Series for binary proba = proba.to_frame().T except Exception: proba = None proba_dict = None if proba is not None: row0 = proba.iloc[0].to_dict() proba_dict = _normalize_proba_keys(row0) # If probabilities missing, fabricate 100% on predicted class for UX if not proba_dict: proba_dict = {pred_label: 1.0, ("No" if pred_label == "Yes" else "Yes"): 0.0} return proba_dict # ---------------------------- # Build Gradio UI # ---------------------------- with gr.Blocks() as demo: gr.Markdown("# Pokémon Card → Collector's Item Predictor (Yes/No)") gr.Markdown( "Enter a card's details to predict whether it's a **collector's item**. " "This GUI mirrors the columns in the dataset " "[ecopus/pokemon_cards](https://huggingface.co/datasets/ecopus/pokemon_cards)." ) with gr.Row(): card_name = gr.Textbox( label="Card", value=(META["card_examples"][0] if META["card_examples"] else "Charizard"), placeholder="e.g., Charizard" ) card_set = gr.Dropdown( choices=META["card_sets"], value=(META["card_sets"][0] if META["card_sets"] else None), label="Card Set", allow_custom_value=True, ) with gr.Row(): year = gr.Slider( minimum=int(META["year_min"]), maximum=int(META["year_max"]), step=1, value=min(2024, int(META["year_max"])), label="Year" ) artwork_style = gr.Dropdown( choices=META["art_styles"], value=(META["art_styles"][0] if META["art_styles"] else None), label="Artwork Style", allow_custom_value=True, ) condition = gr.Dropdown( choices=META["conditions"], value=(META["conditions"][0] if META["conditions"] else None), label="Condition", allow_custom_value=True, ) with gr.Row(): set_number_eq = gr.Slider( minimum=float(META["sne_min"]), maximum=float(META["sne_max"]), step=0.001, value=0.536, label="Set Number Eq" ) market_value = gr.Number( value=round(min(100.00, float(META["mv_max"])), 2), precision=2, label="Market Value (USD)" ) proba_pretty = gr.Label(num_top_classes=2, label="Class probabilities (Yes/No)") inputs = [card_name, year, card_set, artwork_style, condition, set_number_eq, market_value] for comp in inputs: comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty]) # Representative examples from the dataset if available, else a few hand-crafted ones examples = META["examples_rows"] if META["examples_rows"] else [ ["Charizard", 1999, "Base Set", "Holo", "Near Mint", 0.85, 450.00], ["Pikachu", 2024, "Pokemon 151", "Full Art", "Near Mint", 1.05, 47.45], ["Ivysaur", 2025, "Pokemon 151", "Full Art", "Near Mint", 1.106, 30.77], ["Mew", 2024, "Pokemon 151", "Full Art Gold", "Mint", 1.242, 16.51], ["Spheal", 2014, "Evolutions", "Reverse Holo", "Lightly Played", 0.226, 0.12], ] gr.Examples( examples=examples, inputs=inputs, label="Representative examples (from the dataset or sensible defaults)", examples_per_page=min(5, len(examples)), cache_examples=False, ) if __name__ == "__main__": demo.launch()