Spaces:

scottymcgee
/

pokemon

Sleeping

App Files Files Community

scottymcgee commited on Sep 24, 2025

Commit

7b45003

verified ·

1 Parent(s): cacc4d0

Create app.py

Browse files

This application loads a trained AutoGluon TabularPredictor that was built on the ecopus/pokemon_cards dataset and exposes it through a Gradio interface. Users can enter details of a Pokémon card—including its name, release year, set, artwork style, condition, set-number equivalent, and market value—and the model will instantly predict whether the card is considered a collector’s item (“Yes” or “No”). The interface also displays the model’s class probabilities so users can see how confident the model is about each prediction.

Files changed (1) hide show

app.py +345 -0

app.py ADDED Viewed

	@@ -0,0 +1,345 @@

+# -*- coding: utf-8 -*-
+"""
+This application loads a trained AutoGluon TabularPredictor that was built on the ecopus/pokemon_cards dataset and exposes it through a Gradio interface. Users can enter details of a Pokémon card—including its name, release year, set, artwork style, condition, set-number equivalent, and market value—and the model will instantly predict whether the card is considered a collector’s item (“Yes” or “No”). The interface also displays the model’s class probabilities so users can see how confident the model is about each prediction.
+Dataset reference:
+  https://huggingface.co/datasets/ecopus/pokemon_cards
+"""
+# ----------------------------
+# Imports
+# ----------------------------
+import os
+import shutil
+import zipfile
+import pathlib
+from typing import Any, Dict, List, Optional
+import pandas as pd
+import gradio as gr
+import huggingface_hub
+import autogluon.tabular
+# Optional: pull choices/ranges from the dataset (falls back if unavailable)
+try:
+    from datasets import load_dataset
+    HAS_DATASETS = True
+except Exception:
+    HAS_DATASETS = False
+# ----------------------------
+# Settings: point to your trained AutoGluon predictor on the Hub
+# ----------------------------
+MODEL_REPO_ID = "your-username/your-autogluon-predictor-repo"  # <- CHANGE ME
+ZIP_FILENAME  = "autogluon_predictor_dir.zip"                  # <- CHANGE if different
+CACHE_DIR   = pathlib.Path("hf_assets")
+EXTRACT_DIR = CACHE_DIR / "predictor_native"
+# Columns must match training-time names exactly:
+FEATURE_COLS = [
+    "Card",           # string
+    "Year",           # int
+    "Card Set",       # string
+    "Artwork Style",  # string
+    "Condition",      # string
+    "Set Number Eq",  # float
+    "Market Value",   # float
+]
+TARGET_COL = "Collector's Item"  # binary: "Yes"/"No" in the dataset
+# ----------------------------
+# Load predictor (download zip from Hub, then autogluon load)
+# ----------------------------
+def _prepare_predictor_dir() -> str:
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    local_zip = huggingface_hub.hf_hub_download(
+        repo_id=MODEL_REPO_ID,
+        filename=ZIP_FILENAME,
+        repo_type="model",
+        local_dir=str(CACHE_DIR),
+        local_dir_use_symlinks=False,
+    )
+    if EXTRACT_DIR.exists():
+        shutil.rmtree(EXTRACT_DIR)
+    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
+    with zipfile.ZipFile(local_zip, "r") as zf:
+        zf.extractall(str(EXTRACT_DIR))
+    contents = list(EXTRACT_DIR.iterdir())
+    predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
+    return str(predictor_root)
+# If loading locally instead of the Hub, comment these two lines and set:
+# PREDICTOR_DIR = "/path/to/AutogluonModels/ag-<run>"
+PREDICTOR_DIR = _prepare_predictor_dir()
+PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
+# ----------------------------
+# Helpers
+# ----------------------------
+OUTCOME_LABELS = {
+    "Yes": "Yes", "No": "No",
+    1: "Yes", 0: "No",
+    "1": "Yes", "0": "No",
+    True: "Yes", False: "No",
+}
+def _human_label(x: Any) -> str:
+    return OUTCOME_LABELS.get(x, str(x))
+def _normalize_proba_keys(row_probs: Dict[Any, float]) -> Dict[str, float]:
+    normalized: Dict[str, float] = {}
+    for k, v in row_probs.items():
+        key = _human_label(k)
+        normalized[key] = float(v) + float(normalized.get(key, 0.0))
+    # sort high->low
+    return dict(sorted(normalized.items(), key=lambda kv: kv[1], reverse=True))
+# ----------------------------
+# Dataset-driven choices/ranges (with safe fallbacks if offline)
+# ----------------------------
+def get_dataset_metadata() -> dict:
+    """
+    Try to pull unique choices and numeric ranges from ecopus/pokemon_cards.
+    Falls back to hard-coded sensible defaults if the dataset lib or network is unavailable.
+    """
+    meta = {
+        "card_examples": ["Charizard", "Pikachu", "Mew", "Ivysaur"],
+        "card_sets": [
+            "Base Set", "Pokemon 151", "Evolutions", "Prismatic Evolutions",
+            "Journey Together", "Destined Rivals", "Stellar Crown", "BREAKpoint",
+            "EX Sandstorm", "Double Crisis", "McDonalds"
+        ],
+        "art_styles": [
+            "Standard", "Holo", "Reverse Holo", "Full Art",
+            "Full Art Gold", "Full Art Rainbow", "Alternate Art", "Trainer Gallery", "Promo",
+            # include obvious typo seen in a sample row to avoid surprises:
+            "Standart"
+        ],
+        "conditions": ["Mint", "Near Mint", "Lightly Played", "Heavily Played"],
+        "year_min": 1995,
+        "year_max": 2025,
+        "sne_min": 0.04,
+        "sne_max": 1.50,
+        "mv_min": 0.08,
+        "mv_max": 133.00,
+        "examples_rows": [],  # list of example rows matching FEATURE_COLS order
+    }
+    if not HAS_DATASETS:
+        return meta
+    try:
+        ds = load_dataset("ecopus/pokemon_cards")
+        # Merge splits if present
+        split_names = [k for k in ds.keys()]
+        frames: List[pd.DataFrame] = []
+        for sn in split_names:
+            frames.append(pd.DataFrame(ds[sn]))
+        df_all = pd.concat(frames, ignore_index=True)
+        # Coerce types safely (in case commas exist in displayed values)
+        def _to_int(x):
+            try:
+                return int(str(x).replace(",", ""))
+            except Exception:
+                return None
+        def _to_float(x):
+            try:
+                return float(str(x).replace(",", ""))
+            except Exception:
+                return None
+        # Compute unique choices
+        if "Card Set" in df_all.columns:
+            sets = sorted({str(s) for s in df_all["Card Set"].dropna().unique().tolist()})
+            if sets:
+                meta["card_sets"] = sets
+        if "Artwork Style" in df_all.columns:
+            styles = sorted({str(s) for s in df_all["Artwork Style"].dropna().unique().tolist()})
+            if styles:
+                # include 'Standart' if present
+                meta["art_styles"] = styles
+        if "Condition" in df_all.columns:
+            conds = sorted({str(s) for s in df_all["Condition"].dropna().unique().tolist()})
+            if conds:
+                meta["conditions"] = conds
+        # Ranges
+        if "Year" in df_all.columns:
+            years = [y for y in df_all["Year"].map(_to_int).dropna().tolist()]
+            if years:
+                meta["year_min"] = min(years)
+                meta["year_max"] = max(years)
+        if "Set Number Eq" in df_all.columns:
+            sne = [s for s in df_all["Set Number Eq"].map(_to_float).dropna().tolist()]
+            if sne:
+                meta["sne_min"] = float(min(sne))
+                meta["sne_max"] = float(max(sne))
+        if "Market Value" in df_all.columns:
+            mv = [m for m in df_all["Market Value"].map(_to_float).dropna().tolist()]
+            if mv:
+                meta["mv_min"] = float(min(mv))
+                meta["mv_max"] = float(max(mv))
+        # Example rows (grab up to 5 reasonable examples)
+        cols_ok = all(c in df_all.columns for c in FEATURE_COLS)
+        if cols_ok:
+            sample = df_all[FEATURE_COLS].dropna().head(5)
+            meta["examples_rows"] = sample.values.tolist()
+        # Some card names to seed the textbox suggestions
+        if "Card" in df_all.columns:
+            meta["card_examples"] = df_all["Card"].dropna().astype(str).head(8).tolist()
+    except Exception:
+        pass
+    return meta
+META = get_dataset_metadata()
+# ----------------------------
+# Prediction function
+# ----------------------------
+def do_predict(card_name: str,
+               year: float,
+               card_set: str,
+               artwork_style: str,
+               condition: str,
+               set_number_eq: float,
+               market_value: float):
+    # Build a single-row DataFrame exactly matching training columns
+    row = {
+        "Card": str(card_name).strip(),
+        "Year": int(year),
+        "Card Set": str(card_set).strip(),
+        "Artwork Style": str(artwork_style).strip(),
+        "Condition": str(condition).strip(),
+        "Set Number Eq": float(set_number_eq),
+        "Market Value": float(market_value),
+    }
+    X = pd.DataFrame([row], columns=FEATURE_COLS)
+    # Predict label
+    pred_series = PREDICTOR.predict(X)
+    raw_pred = pred_series.iloc[0]
+    pred_label = _human_label(raw_pred)
+    # Predict probabilities (if available)
+    try:
+        proba = PREDICTOR.predict_proba(X)
+        if isinstance(proba, pd.Series):  # AutoGluon can return Series for binary
+            proba = proba.to_frame().T
+    except Exception:
+        proba = None
+    proba_dict = None
+    if proba is not None:
+        row0 = proba.iloc[0].to_dict()
+        proba_dict = _normalize_proba_keys(row0)
+    # If probabilities missing, fabricate 100% on predicted class for UX
+    if not proba_dict:
+        proba_dict = {pred_label: 1.0, ("No" if pred_label == "Yes" else "Yes"): 0.0}
+    return proba_dict
+# ----------------------------
+# Build Gradio UI
+# ----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# Pokémon Card → Collector's Item Predictor (Yes/No)")
+    gr.Markdown(
+        "Enter a card's details to predict whether it's a **collector's item**. "
+        "This GUI mirrors the columns in the dataset "
+        "[ecopus/pokemon_cards](https://huggingface.co/datasets/ecopus/pokemon_cards)."
+    )
+    with gr.Row():
+        card_name = gr.Textbox(
+            label="Card",
+            value=(META["card_examples"][0] if META["card_examples"] else "Charizard"),
+            placeholder="e.g., Charizard"
+        )
+        card_set = gr.Dropdown(
+            choices=META["card_sets"],
+            value=(META["card_sets"][0] if META["card_sets"] else None),
+            label="Card Set",
+            allow_custom_value=True,
+        )
+    with gr.Row():
+        year = gr.Slider(
+            minimum=int(META["year_min"]),
+            maximum=int(META["year_max"]),
+            step=1,
+            value=min(2024, int(META["year_max"])),
+            label="Year"
+        )
+        artwork_style = gr.Dropdown(
+            choices=META["art_styles"],
+            value=(META["art_styles"][0] if META["art_styles"] else None),
+            label="Artwork Style",
+            allow_custom_value=True,
+        )
+        condition = gr.Dropdown(
+            choices=META["conditions"],
+            value=(META["conditions"][0] if META["conditions"] else None),
+            label="Condition",
+            allow_custom_value=True,
+        )
+    with gr.Row():
+        set_number_eq = gr.Slider(
+            minimum=float(META["sne_min"]),
+            maximum=float(META["sne_max"]),
+            step=0.001,
+            value=0.536,
+            label="Set Number Eq"
+        )
+        market_value = gr.Number(
+            value=round(min(100.00, float(META["mv_max"])), 2),
+            precision=2,
+            label="Market Value (USD)"
+        )
+    proba_pretty = gr.Label(num_top_classes=2, label="Class probabilities (Yes/No)")
+    inputs = [card_name, year, card_set, artwork_style, condition, set_number_eq, market_value]
+    for comp in inputs:
+        comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])
+    # Representative examples from the dataset if available, else a few hand-crafted ones
+    examples = META["examples_rows"] if META["examples_rows"] else [
+        ["Charizard", 1999, "Base Set", "Holo", "Near Mint", 0.85, 450.00],
+        ["Pikachu", 2024, "Pokemon 151", "Full Art", "Near Mint", 1.05, 47.45],
+        ["Ivysaur", 2025, "Pokemon 151", "Full Art", "Near Mint", 1.106, 30.77],
+        ["Mew", 2024, "Pokemon 151", "Full Art Gold", "Mint", 1.242, 16.51],
+        ["Spheal", 2014, "Evolutions", "Reverse Holo", "Lightly Played", 0.226, 0.12],
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=inputs,
+        label="Representative examples (from the dataset or sensible defaults)",
+        examples_per_page=min(5, len(examples)),
+        cache_examples=False,
+    )
+if __name__ == "__main__":
+    demo.launch()