Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| This application loads a trained AutoGluon TabularPredictor that was built on the ecopus/pokemon_cards dataset and exposes it through a Gradio interface. Users can enter details of a Pokémon card—including its name, release year, set, artwork style, condition, set-number equivalent, and market value—and the model will instantly predict whether the card is considered a collector’s item (“Yes” or “No”). The interface also displays the model’s class probabilities so users can see how confident the model is about each prediction. | |
| Dataset reference: | |
| https://huggingface.co/datasets/ecopus/pokemon_cards | |
| """ | |
| # ---------------------------- | |
| # Imports | |
| # ---------------------------- | |
| import os | |
| import shutil | |
| import zipfile | |
| import pathlib | |
| from typing import Any, Dict, List, Optional | |
| import pandas as pd | |
| import gradio as gr | |
| import huggingface_hub | |
| import autogluon.tabular | |
| # Optional: pull choices/ranges from the dataset (falls back if unavailable) | |
| try: | |
| from datasets import load_dataset | |
| HAS_DATASETS = True | |
| except Exception: | |
| HAS_DATASETS = False | |
| # ---------------------------- | |
| # Settings: point to your trained AutoGluon predictor on the Hub | |
| # ---------------------------- | |
| MODEL_REPO_ID = "samder03/2025-24679-tabular-autolguon-predictor" # <- CHANGE ME | |
| ZIP_FILENAME = "autogluon_predictor_dir.zip" # <- CHANGE if different | |
| CACHE_DIR = pathlib.Path("hf_assets") | |
| EXTRACT_DIR = CACHE_DIR / "predictor_native" | |
| # Columns must match training-time names exactly: | |
| FEATURE_COLS = [ | |
| "Card", # string | |
| "Year", # int | |
| "Card Set", # string | |
| "Artwork Style", # string | |
| "Condition", # string | |
| "Set Number Eq", # float | |
| "Market Value", # float | |
| ] | |
| TARGET_COL = "Collector's Item" # binary: "Yes"/"No" in the dataset | |
| # ---------------------------- | |
| # Load predictor (download zip from Hub, then autogluon load) | |
| # ---------------------------- | |
| def _prepare_predictor_dir() -> str: | |
| CACHE_DIR.mkdir(parents=True, exist_ok=True) | |
| local_zip = huggingface_hub.hf_hub_download( | |
| repo_id=MODEL_REPO_ID, | |
| filename=ZIP_FILENAME, | |
| repo_type="model", | |
| local_dir=str(CACHE_DIR), | |
| local_dir_use_symlinks=False, | |
| ) | |
| if EXTRACT_DIR.exists(): | |
| shutil.rmtree(EXTRACT_DIR) | |
| EXTRACT_DIR.mkdir(parents=True, exist_ok=True) | |
| with zipfile.ZipFile(local_zip, "r") as zf: | |
| zf.extractall(str(EXTRACT_DIR)) | |
| contents = list(EXTRACT_DIR.iterdir()) | |
| predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR | |
| return str(predictor_root) | |
| # If loading locally instead of the Hub, comment these two lines and set: | |
| # PREDICTOR_DIR = "/path/to/AutogluonModels/ag-<run>" | |
| PREDICTOR_DIR = _prepare_predictor_dir() | |
| PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) | |
| # ---------------------------- | |
| # Helpers | |
| # ---------------------------- | |
| OUTCOME_LABELS = { | |
| "Yes": "Yes", "No": "No", | |
| 1: "Yes", 0: "No", | |
| "1": "Yes", "0": "No", | |
| True: "Yes", False: "No", | |
| } | |
| def _human_label(x: Any) -> str: | |
| return OUTCOME_LABELS.get(x, str(x)) | |
| def _normalize_proba_keys(row_probs: Dict[Any, float]) -> Dict[str, float]: | |
| normalized: Dict[str, float] = {} | |
| for k, v in row_probs.items(): | |
| key = _human_label(k) | |
| normalized[key] = float(v) + float(normalized.get(key, 0.0)) | |
| # sort high->low | |
| return dict(sorted(normalized.items(), key=lambda kv: kv[1], reverse=True)) | |
| # ---------------------------- | |
| # Dataset-driven choices/ranges (with safe fallbacks if offline) | |
| # ---------------------------- | |
| def get_dataset_metadata() -> dict: | |
| """ | |
| Try to pull unique choices and numeric ranges from ecopus/pokemon_cards. | |
| Falls back to hard-coded sensible defaults if the dataset lib or network is unavailable. | |
| """ | |
| meta = { | |
| "card_examples": ["Charizard", "Pikachu", "Mew", "Ivysaur"], | |
| "card_sets": [ | |
| "Base Set", "Pokemon 151", "Evolutions", "Prismatic Evolutions", | |
| "Journey Together", "Destined Rivals", "Stellar Crown", "BREAKpoint", | |
| "EX Sandstorm", "Double Crisis", "McDonalds" | |
| ], | |
| "art_styles": [ | |
| "Standard", "Holo", "Reverse Holo", "Full Art", | |
| "Full Art Gold", "Full Art Rainbow", "Alternate Art", "Trainer Gallery", "Promo", | |
| # include obvious typo seen in a sample row to avoid surprises: | |
| "Standart" | |
| ], | |
| "conditions": ["Mint", "Near Mint", "Lightly Played", "Heavily Played"], | |
| "year_min": 1995, | |
| "year_max": 2025, | |
| "sne_min": 0.04, | |
| "sne_max": 1.50, | |
| "mv_min": 0.08, | |
| "mv_max": 133.00, | |
| "examples_rows": [], # list of example rows matching FEATURE_COLS order | |
| } | |
| if not HAS_DATASETS: | |
| return meta | |
| try: | |
| ds = load_dataset("ecopus/pokemon_cards") | |
| # Merge splits if present | |
| split_names = [k for k in ds.keys()] | |
| frames: List[pd.DataFrame] = [] | |
| for sn in split_names: | |
| frames.append(pd.DataFrame(ds[sn])) | |
| df_all = pd.concat(frames, ignore_index=True) | |
| # Coerce types safely (in case commas exist in displayed values) | |
| def _to_int(x): | |
| try: | |
| return int(str(x).replace(",", "")) | |
| except Exception: | |
| return None | |
| def _to_float(x): | |
| try: | |
| return float(str(x).replace(",", "")) | |
| except Exception: | |
| return None | |
| # Compute unique choices | |
| if "Card Set" in df_all.columns: | |
| sets = sorted({str(s) for s in df_all["Card Set"].dropna().unique().tolist()}) | |
| if sets: | |
| meta["card_sets"] = sets | |
| if "Artwork Style" in df_all.columns: | |
| styles = sorted({str(s) for s in df_all["Artwork Style"].dropna().unique().tolist()}) | |
| if styles: | |
| # include 'Standart' if present | |
| meta["art_styles"] = styles | |
| if "Condition" in df_all.columns: | |
| conds = sorted({str(s) for s in df_all["Condition"].dropna().unique().tolist()}) | |
| if conds: | |
| meta["conditions"] = conds | |
| # Ranges | |
| if "Year" in df_all.columns: | |
| years = [y for y in df_all["Year"].map(_to_int).dropna().tolist()] | |
| if years: | |
| meta["year_min"] = min(years) | |
| meta["year_max"] = max(years) | |
| if "Set Number Eq" in df_all.columns: | |
| sne = [s for s in df_all["Set Number Eq"].map(_to_float).dropna().tolist()] | |
| if sne: | |
| meta["sne_min"] = float(min(sne)) | |
| meta["sne_max"] = float(max(sne)) | |
| if "Market Value" in df_all.columns: | |
| mv = [m for m in df_all["Market Value"].map(_to_float).dropna().tolist()] | |
| if mv: | |
| meta["mv_min"] = float(min(mv)) | |
| meta["mv_max"] = float(max(mv)) | |
| # Example rows (grab up to 5 reasonable examples) | |
| cols_ok = all(c in df_all.columns for c in FEATURE_COLS) | |
| if cols_ok: | |
| sample = df_all[FEATURE_COLS].dropna().head(5) | |
| meta["examples_rows"] = sample.values.tolist() | |
| # Some card names to seed the textbox suggestions | |
| if "Card" in df_all.columns: | |
| meta["card_examples"] = df_all["Card"].dropna().astype(str).head(8).tolist() | |
| except Exception: | |
| pass | |
| return meta | |
| META = get_dataset_metadata() | |
| # ---------------------------- | |
| # Prediction function | |
| # ---------------------------- | |
| def do_predict(card_name: str, | |
| year: float, | |
| card_set: str, | |
| artwork_style: str, | |
| condition: str, | |
| set_number_eq: float, | |
| market_value: float): | |
| # Build a single-row DataFrame exactly matching training columns | |
| row = { | |
| "Card": str(card_name).strip(), | |
| "Year": int(year), | |
| "Card Set": str(card_set).strip(), | |
| "Artwork Style": str(artwork_style).strip(), | |
| "Condition": str(condition).strip(), | |
| "Set Number Eq": float(set_number_eq), | |
| "Market Value": float(market_value), | |
| } | |
| X = pd.DataFrame([row], columns=FEATURE_COLS) | |
| # Predict label | |
| pred_series = PREDICTOR.predict(X) | |
| raw_pred = pred_series.iloc[0] | |
| pred_label = _human_label(raw_pred) | |
| # Predict probabilities (if available) | |
| try: | |
| proba = PREDICTOR.predict_proba(X) | |
| if isinstance(proba, pd.Series): # AutoGluon can return Series for binary | |
| proba = proba.to_frame().T | |
| except Exception: | |
| proba = None | |
| proba_dict = None | |
| if proba is not None: | |
| row0 = proba.iloc[0].to_dict() | |
| proba_dict = _normalize_proba_keys(row0) | |
| # If probabilities missing, fabricate 100% on predicted class for UX | |
| if not proba_dict: | |
| proba_dict = {pred_label: 1.0, ("No" if pred_label == "Yes" else "Yes"): 0.0} | |
| return proba_dict | |
| # ---------------------------- | |
| # Build Gradio UI | |
| # ---------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Pokémon Card → Collector's Item Predictor (Yes/No)") | |
| gr.Markdown( | |
| "Enter a card's details to predict whether it's a **collector's item**. " | |
| "This GUI mirrors the columns in the dataset " | |
| "[ecopus/pokemon_cards](https://huggingface.co/datasets/ecopus/pokemon_cards)." | |
| ) | |
| with gr.Row(): | |
| card_name = gr.Textbox( | |
| label="Card", | |
| value=(META["card_examples"][0] if META["card_examples"] else "Charizard"), | |
| placeholder="e.g., Charizard" | |
| ) | |
| card_set = gr.Dropdown( | |
| choices=META["card_sets"], | |
| value=(META["card_sets"][0] if META["card_sets"] else None), | |
| label="Card Set", | |
| allow_custom_value=True, | |
| ) | |
| with gr.Row(): | |
| year = gr.Slider( | |
| minimum=int(META["year_min"]), | |
| maximum=int(META["year_max"]), | |
| step=1, | |
| value=min(2024, int(META["year_max"])), | |
| label="Year" | |
| ) | |
| artwork_style = gr.Dropdown( | |
| choices=META["art_styles"], | |
| value=(META["art_styles"][0] if META["art_styles"] else None), | |
| label="Artwork Style", | |
| allow_custom_value=True, | |
| ) | |
| condition = gr.Dropdown( | |
| choices=META["conditions"], | |
| value=(META["conditions"][0] if META["conditions"] else None), | |
| label="Condition", | |
| allow_custom_value=True, | |
| ) | |
| with gr.Row(): | |
| set_number_eq = gr.Slider( | |
| minimum=float(META["sne_min"]), | |
| maximum=float(META["sne_max"]), | |
| step=0.001, | |
| value=0.536, | |
| label="Set Number Eq" | |
| ) | |
| market_value = gr.Number( | |
| value=round(min(100.00, float(META["mv_max"])), 2), | |
| precision=2, | |
| label="Market Value (USD)" | |
| ) | |
| proba_pretty = gr.Label(num_top_classes=2, label="Class probabilities (Yes/No)") | |
| inputs = [card_name, year, card_set, artwork_style, condition, set_number_eq, market_value] | |
| for comp in inputs: | |
| comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty]) | |
| # Representative examples from the dataset if available, else a few hand-crafted ones | |
| examples = META["examples_rows"] if META["examples_rows"] else [ | |
| ["Charizard", 1999, "Base Set", "Holo", "Near Mint", 0.85, 450.00], | |
| ["Pikachu", 2024, "Pokemon 151", "Full Art", "Near Mint", 1.05, 47.45], | |
| ["Ivysaur", 2025, "Pokemon 151", "Full Art", "Near Mint", 1.106, 30.77], | |
| ["Mew", 2024, "Pokemon 151", "Full Art Gold", "Mint", 1.242, 16.51], | |
| ["Spheal", 2014, "Evolutions", "Reverse Holo", "Lightly Played", 0.226, 0.12], | |
| ] | |
| gr.Examples( | |
| examples=examples, | |
| inputs=inputs, | |
| label="Representative examples (from the dataset or sensible defaults)", | |
| examples_per_page=min(5, len(examples)), | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |