scottymcgee commited on
Commit
7b45003
·
verified ·
1 Parent(s): cacc4d0

Create app.py

Browse files

This application loads a trained AutoGluon TabularPredictor that was built on the ecopus/pokemon_cards dataset and exposes it through a Gradio interface. Users can enter details of a Pokémon card—including its name, release year, set, artwork style, condition, set-number equivalent, and market value—and the model will instantly predict whether the card is considered a collector’s item (“Yes” or “No”). The interface also displays the model’s class probabilities so users can see how confident the model is about each prediction.

Files changed (1) hide show
  1. app.py +345 -0
app.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ This application loads a trained AutoGluon TabularPredictor that was built on the ecopus/pokemon_cards dataset and exposes it through a Gradio interface. Users can enter details of a Pokémon card—including its name, release year, set, artwork style, condition, set-number equivalent, and market value—and the model will instantly predict whether the card is considered a collector’s item (“Yes” or “No”). The interface also displays the model’s class probabilities so users can see how confident the model is about each prediction.
4
+
5
+ Dataset reference:
6
+ https://huggingface.co/datasets/ecopus/pokemon_cards
7
+ """
8
+
9
+ # ----------------------------
10
+ # Imports
11
+ # ----------------------------
12
+ import os
13
+ import shutil
14
+ import zipfile
15
+ import pathlib
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ import pandas as pd
19
+ import gradio as gr
20
+ import huggingface_hub
21
+ import autogluon.tabular
22
+
23
+ # Optional: pull choices/ranges from the dataset (falls back if unavailable)
24
+ try:
25
+ from datasets import load_dataset
26
+ HAS_DATASETS = True
27
+ except Exception:
28
+ HAS_DATASETS = False
29
+
30
+
31
+ # ----------------------------
32
+ # Settings: point to your trained AutoGluon predictor on the Hub
33
+ # ----------------------------
34
+ MODEL_REPO_ID = "your-username/your-autogluon-predictor-repo" # <- CHANGE ME
35
+ ZIP_FILENAME = "autogluon_predictor_dir.zip" # <- CHANGE if different
36
+
37
+ CACHE_DIR = pathlib.Path("hf_assets")
38
+ EXTRACT_DIR = CACHE_DIR / "predictor_native"
39
+
40
+ # Columns must match training-time names exactly:
41
+ FEATURE_COLS = [
42
+ "Card", # string
43
+ "Year", # int
44
+ "Card Set", # string
45
+ "Artwork Style", # string
46
+ "Condition", # string
47
+ "Set Number Eq", # float
48
+ "Market Value", # float
49
+ ]
50
+ TARGET_COL = "Collector's Item" # binary: "Yes"/"No" in the dataset
51
+
52
+
53
+ # ----------------------------
54
+ # Load predictor (download zip from Hub, then autogluon load)
55
+ # ----------------------------
56
+ def _prepare_predictor_dir() -> str:
57
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
58
+ local_zip = huggingface_hub.hf_hub_download(
59
+ repo_id=MODEL_REPO_ID,
60
+ filename=ZIP_FILENAME,
61
+ repo_type="model",
62
+ local_dir=str(CACHE_DIR),
63
+ local_dir_use_symlinks=False,
64
+ )
65
+ if EXTRACT_DIR.exists():
66
+ shutil.rmtree(EXTRACT_DIR)
67
+ EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
68
+ with zipfile.ZipFile(local_zip, "r") as zf:
69
+ zf.extractall(str(EXTRACT_DIR))
70
+
71
+ contents = list(EXTRACT_DIR.iterdir())
72
+ predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR
73
+ return str(predictor_root)
74
+
75
+ # If loading locally instead of the Hub, comment these two lines and set:
76
+ # PREDICTOR_DIR = "/path/to/AutogluonModels/ag-<run>"
77
+ PREDICTOR_DIR = _prepare_predictor_dir()
78
+ PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False)
79
+
80
+
81
+ # ----------------------------
82
+ # Helpers
83
+ # ----------------------------
84
+ OUTCOME_LABELS = {
85
+ "Yes": "Yes", "No": "No",
86
+ 1: "Yes", 0: "No",
87
+ "1": "Yes", "0": "No",
88
+ True: "Yes", False: "No",
89
+ }
90
+
91
+ def _human_label(x: Any) -> str:
92
+ return OUTCOME_LABELS.get(x, str(x))
93
+
94
+ def _normalize_proba_keys(row_probs: Dict[Any, float]) -> Dict[str, float]:
95
+ normalized: Dict[str, float] = {}
96
+ for k, v in row_probs.items():
97
+ key = _human_label(k)
98
+ normalized[key] = float(v) + float(normalized.get(key, 0.0))
99
+ # sort high->low
100
+ return dict(sorted(normalized.items(), key=lambda kv: kv[1], reverse=True))
101
+
102
+
103
+ # ----------------------------
104
+ # Dataset-driven choices/ranges (with safe fallbacks if offline)
105
+ # ----------------------------
106
+ def get_dataset_metadata() -> dict:
107
+ """
108
+ Try to pull unique choices and numeric ranges from ecopus/pokemon_cards.
109
+ Falls back to hard-coded sensible defaults if the dataset lib or network is unavailable.
110
+ """
111
+ meta = {
112
+ "card_examples": ["Charizard", "Pikachu", "Mew", "Ivysaur"],
113
+ "card_sets": [
114
+ "Base Set", "Pokemon 151", "Evolutions", "Prismatic Evolutions",
115
+ "Journey Together", "Destined Rivals", "Stellar Crown", "BREAKpoint",
116
+ "EX Sandstorm", "Double Crisis", "McDonalds"
117
+ ],
118
+ "art_styles": [
119
+ "Standard", "Holo", "Reverse Holo", "Full Art",
120
+ "Full Art Gold", "Full Art Rainbow", "Alternate Art", "Trainer Gallery", "Promo",
121
+ # include obvious typo seen in a sample row to avoid surprises:
122
+ "Standart"
123
+ ],
124
+ "conditions": ["Mint", "Near Mint", "Lightly Played", "Heavily Played"],
125
+ "year_min": 1995,
126
+ "year_max": 2025,
127
+ "sne_min": 0.04,
128
+ "sne_max": 1.50,
129
+ "mv_min": 0.08,
130
+ "mv_max": 133.00,
131
+ "examples_rows": [], # list of example rows matching FEATURE_COLS order
132
+ }
133
+
134
+ if not HAS_DATASETS:
135
+ return meta
136
+
137
+ try:
138
+ ds = load_dataset("ecopus/pokemon_cards")
139
+ # Merge splits if present
140
+ split_names = [k for k in ds.keys()]
141
+ frames: List[pd.DataFrame] = []
142
+ for sn in split_names:
143
+ frames.append(pd.DataFrame(ds[sn]))
144
+ df_all = pd.concat(frames, ignore_index=True)
145
+
146
+ # Coerce types safely (in case commas exist in displayed values)
147
+ def _to_int(x):
148
+ try:
149
+ return int(str(x).replace(",", ""))
150
+ except Exception:
151
+ return None
152
+
153
+ def _to_float(x):
154
+ try:
155
+ return float(str(x).replace(",", ""))
156
+ except Exception:
157
+ return None
158
+
159
+ # Compute unique choices
160
+ if "Card Set" in df_all.columns:
161
+ sets = sorted({str(s) for s in df_all["Card Set"].dropna().unique().tolist()})
162
+ if sets:
163
+ meta["card_sets"] = sets
164
+
165
+ if "Artwork Style" in df_all.columns:
166
+ styles = sorted({str(s) for s in df_all["Artwork Style"].dropna().unique().tolist()})
167
+ if styles:
168
+ # include 'Standart' if present
169
+ meta["art_styles"] = styles
170
+
171
+ if "Condition" in df_all.columns:
172
+ conds = sorted({str(s) for s in df_all["Condition"].dropna().unique().tolist()})
173
+ if conds:
174
+ meta["conditions"] = conds
175
+
176
+ # Ranges
177
+ if "Year" in df_all.columns:
178
+ years = [y for y in df_all["Year"].map(_to_int).dropna().tolist()]
179
+ if years:
180
+ meta["year_min"] = min(years)
181
+ meta["year_max"] = max(years)
182
+
183
+ if "Set Number Eq" in df_all.columns:
184
+ sne = [s for s in df_all["Set Number Eq"].map(_to_float).dropna().tolist()]
185
+ if sne:
186
+ meta["sne_min"] = float(min(sne))
187
+ meta["sne_max"] = float(max(sne))
188
+
189
+ if "Market Value" in df_all.columns:
190
+ mv = [m for m in df_all["Market Value"].map(_to_float).dropna().tolist()]
191
+ if mv:
192
+ meta["mv_min"] = float(min(mv))
193
+ meta["mv_max"] = float(max(mv))
194
+
195
+ # Example rows (grab up to 5 reasonable examples)
196
+ cols_ok = all(c in df_all.columns for c in FEATURE_COLS)
197
+ if cols_ok:
198
+ sample = df_all[FEATURE_COLS].dropna().head(5)
199
+ meta["examples_rows"] = sample.values.tolist()
200
+
201
+ # Some card names to seed the textbox suggestions
202
+ if "Card" in df_all.columns:
203
+ meta["card_examples"] = df_all["Card"].dropna().astype(str).head(8).tolist()
204
+
205
+ except Exception:
206
+ pass
207
+
208
+ return meta
209
+
210
+
211
+ META = get_dataset_metadata()
212
+
213
+
214
+ # ----------------------------
215
+ # Prediction function
216
+ # ----------------------------
217
+ def do_predict(card_name: str,
218
+ year: float,
219
+ card_set: str,
220
+ artwork_style: str,
221
+ condition: str,
222
+ set_number_eq: float,
223
+ market_value: float):
224
+
225
+ # Build a single-row DataFrame exactly matching training columns
226
+ row = {
227
+ "Card": str(card_name).strip(),
228
+ "Year": int(year),
229
+ "Card Set": str(card_set).strip(),
230
+ "Artwork Style": str(artwork_style).strip(),
231
+ "Condition": str(condition).strip(),
232
+ "Set Number Eq": float(set_number_eq),
233
+ "Market Value": float(market_value),
234
+ }
235
+ X = pd.DataFrame([row], columns=FEATURE_COLS)
236
+
237
+ # Predict label
238
+ pred_series = PREDICTOR.predict(X)
239
+ raw_pred = pred_series.iloc[0]
240
+ pred_label = _human_label(raw_pred)
241
+
242
+ # Predict probabilities (if available)
243
+ try:
244
+ proba = PREDICTOR.predict_proba(X)
245
+ if isinstance(proba, pd.Series): # AutoGluon can return Series for binary
246
+ proba = proba.to_frame().T
247
+ except Exception:
248
+ proba = None
249
+
250
+ proba_dict = None
251
+ if proba is not None:
252
+ row0 = proba.iloc[0].to_dict()
253
+ proba_dict = _normalize_proba_keys(row0)
254
+
255
+ # If probabilities missing, fabricate 100% on predicted class for UX
256
+ if not proba_dict:
257
+ proba_dict = {pred_label: 1.0, ("No" if pred_label == "Yes" else "Yes"): 0.0}
258
+
259
+ return proba_dict
260
+
261
+
262
+ # ----------------------------
263
+ # Build Gradio UI
264
+ # ----------------------------
265
+ with gr.Blocks() as demo:
266
+ gr.Markdown("# Pokémon Card → Collector's Item Predictor (Yes/No)")
267
+ gr.Markdown(
268
+ "Enter a card's details to predict whether it's a **collector's item**. "
269
+ "This GUI mirrors the columns in the dataset "
270
+ "[ecopus/pokemon_cards](https://huggingface.co/datasets/ecopus/pokemon_cards)."
271
+ )
272
+
273
+ with gr.Row():
274
+ card_name = gr.Textbox(
275
+ label="Card",
276
+ value=(META["card_examples"][0] if META["card_examples"] else "Charizard"),
277
+ placeholder="e.g., Charizard"
278
+ )
279
+ card_set = gr.Dropdown(
280
+ choices=META["card_sets"],
281
+ value=(META["card_sets"][0] if META["card_sets"] else None),
282
+ label="Card Set",
283
+ allow_custom_value=True,
284
+ )
285
+
286
+ with gr.Row():
287
+ year = gr.Slider(
288
+ minimum=int(META["year_min"]),
289
+ maximum=int(META["year_max"]),
290
+ step=1,
291
+ value=min(2024, int(META["year_max"])),
292
+ label="Year"
293
+ )
294
+ artwork_style = gr.Dropdown(
295
+ choices=META["art_styles"],
296
+ value=(META["art_styles"][0] if META["art_styles"] else None),
297
+ label="Artwork Style",
298
+ allow_custom_value=True,
299
+ )
300
+ condition = gr.Dropdown(
301
+ choices=META["conditions"],
302
+ value=(META["conditions"][0] if META["conditions"] else None),
303
+ label="Condition",
304
+ allow_custom_value=True,
305
+ )
306
+
307
+ with gr.Row():
308
+ set_number_eq = gr.Slider(
309
+ minimum=float(META["sne_min"]),
310
+ maximum=float(META["sne_max"]),
311
+ step=0.001,
312
+ value=0.536,
313
+ label="Set Number Eq"
314
+ )
315
+ market_value = gr.Number(
316
+ value=round(min(100.00, float(META["mv_max"])), 2),
317
+ precision=2,
318
+ label="Market Value (USD)"
319
+ )
320
+
321
+ proba_pretty = gr.Label(num_top_classes=2, label="Class probabilities (Yes/No)")
322
+
323
+ inputs = [card_name, year, card_set, artwork_style, condition, set_number_eq, market_value]
324
+ for comp in inputs:
325
+ comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty])
326
+
327
+ # Representative examples from the dataset if available, else a few hand-crafted ones
328
+ examples = META["examples_rows"] if META["examples_rows"] else [
329
+ ["Charizard", 1999, "Base Set", "Holo", "Near Mint", 0.85, 450.00],
330
+ ["Pikachu", 2024, "Pokemon 151", "Full Art", "Near Mint", 1.05, 47.45],
331
+ ["Ivysaur", 2025, "Pokemon 151", "Full Art", "Near Mint", 1.106, 30.77],
332
+ ["Mew", 2024, "Pokemon 151", "Full Art Gold", "Mint", 1.242, 16.51],
333
+ ["Spheal", 2014, "Evolutions", "Reverse Holo", "Lightly Played", 0.226, 0.12],
334
+ ]
335
+
336
+ gr.Examples(
337
+ examples=examples,
338
+ inputs=inputs,
339
+ label="Representative examples (from the dataset or sensible defaults)",
340
+ examples_per_page=min(5, len(examples)),
341
+ cache_examples=False,
342
+ )
343
+
344
+ if __name__ == "__main__":
345
+ demo.launch()