Spaces:

OffWorldTensor
/

PokePrice

Runtime error

App Files Files Community

OffWorldTensor commited on Aug 31, 2025

Commit

5256800

1 Parent(s): 074528e

feat: Refine Gradio UI and improve model card

Browse files

Files changed (7) hide show

.gitattributes +3 -0
.idea/workspace.xml +8 -0
README.md +10 -3
__pycache__/network.cpython-312.pyc +0 -0
app.py +52 -36
explain_model.py +202 -0
requirements.txt +2 -1

.gitattributes CHANGED Viewed

@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.csv filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.csv filter=lfs diff=lfs merge=lfs -text
+model/model.safetensors filter=lfs diff=lfs merge=lfs -text
+data/pokemon_final_with_labels.csv filter=lfs diff=lfs merge=lfs -text
+data/scaler.pkl filter=lfs diff=lfs merge=lfs -text

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;settings.editor.selected.configurable&quot;: &quot;com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable&quot;
+  }
+}</component>
+</project>

README.md CHANGED Viewed

@@ -10,9 +10,16 @@ pinned: false
 license: mit
 tags:
   - pytorch
   - machine-learning
-  - pokemon
   - price-prediction
 ---
 ## PokePrice: Pokémon Card Price Trend Predictor
@@ -20,10 +27,10 @@ tags:
 This application uses a PyTorch-based neural network to predict whether the market price of a specific Pokémon card will rise by 30% or more over the next six months.
 ### How It Works
-1.  **Select a Card:** Choose a Pokémon card from the dropdown menu. The list is populated from a dataset containing historical price information.
 2.  **Get Prediction:** The model analyzes various features of the selected card, such as its rarity, type, and historical price data, to make a prediction.
 3.  **View Results:** The application displays:
-    *   The prediction (whether the price is expected to **RISE** or **NOT RISE**).
     *   The model's confidence level in the prediction.
     *   A direct link to view the card on TCGPlayer.com.
     *   The actual historical outcome if it exists in the dataset, for comparison.

 license: mit
 tags:
   - pytorch
+  - scikit-learn
+  - gradio
   - machine-learning
+  - tabular-classification
   - price-prediction
+  - finance
+  - pokemon
+  - pokemon-cards
+  - tcg
+  - collectibles
 ---
 ## PokePrice: Pokémon Card Price Trend Predictor
 This application uses a PyTorch-based neural network to predict whether the market price of a specific Pokémon card will rise by 30% or more over the next six months.
 ### How It Works
+1.  **Enter a Card ID:** Input the numeric TCGPlayer ID for a specific Pokémon card. You can find this ID in the URL of the card's page on the TCGPlayer website (e.g., `tcgplayer.com/product/84198/...`).
 2.  **Get Prediction:** The model analyzes various features of the selected card, such as its rarity, type, and historical price data, to make a prediction.
 3.  **View Results:** The application displays:
+    *   The card's name and the prediction (whether the price is expected to **RISE** or **NOT RISE**).
     *   The model's confidence level in the prediction.
     *   A direct link to view the card on TCGPlayer.com.
     *   The actual historical outcome if it exists in the dataset, for comparison.

__pycache__/network.cpython-312.pyc ADDED Viewed

Binary file (1.53 kB). View file

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import joblib
 import pandas as pd
 import os
 import json
-import re
 from safetensors.torch import load_file
 from typing import List, Tuple
 from network import PricePredictor
@@ -16,6 +15,7 @@ DATA_PATH = os.path.join(DATA_DIR, "pokemon_final_with_labels.csv")
 TARGET_COLUMN = 'price_will_rise_30_in_6m'
 def load_model_and_config(model_dir: str) -> Tuple[torch.nn.Module, List[str]]:
     config_path = os.path.join(model_dir, "config.json")
     with open(config_path, "r") as f:
@@ -40,51 +40,52 @@ def perform_prediction(model: torch.nn.Module, scaler, input_features: pd.Series
     return predicted_class, probability
 try:
     model, feature_columns = load_model_and_config(MODEL_DIR)
     scaler = joblib.load(SCALER_PATH)
     full_data = pd.read_csv(DATA_PATH)
-    full_data['display_name'] = full_data.apply(
-        lambda row: f"{row['name']} (ID: {row['tcgplayer_id']})", axis=1
-    )
-    card_choices = sorted(full_data['display_name'].unique().tolist())
     ASSETS_LOADED = True
 except FileNotFoundError as e:
     print(f"Error loading necessary files: {e}")
     print("Please make sure you have uploaded the 'model' and 'data' directories to your Hugging Face Space.")
-    card_choices = ["Error: Model or data files not found. Check logs."]
     ASSETS_LOADED = False
-def predict_price_trend(card_display_name: str) -> str:
     if not ASSETS_LOADED:
         return "## Application Error\nAssets could not be loaded. Please check the logs on Hugging Face Spaces for details. You may need to upload your `model` and `data` directories."
-    try:
-        tcgplayer_id = int(re.search(r'\(ID: (\d+)\)', card_display_name).group(1))
-    except (AttributeError, ValueError):
-        return f"## Input Error\nCould not parse ID from '{card_display_name}'. Please select a valid card from the dropdown."
-    card_data = full_data[full_data['tcgplayer_id'] == tcgplayer_id]
     if card_data.empty:
-        return f"## Internal Error\nCould not find data for ID {tcgplayer_id}. Please restart the Space or select another card."
     card_sample = card_data.iloc[0]
     sample_features = card_sample[feature_columns]
     predicted_class, probability = perform_prediction(model, scaler, sample_features)
     prediction_text = "**RISE**" if predicted_class else "**NOT RISE**"
     confidence = probability if predicted_class else 1 - probability
-    # Construct the TCGPlayer link
     tcgplayer_link = f"https://www.tcgplayer.com/product/{tcgplayer_id}?Language=English"
     true_label_text = ""
-    if TARGET_COLUMN in card_sample and pd.notna(card_sample[TARGET_COLUMN]):
-        true_label = bool(card_sample[TARGET_COLUMN])
-        true_label_text = f"\n- **Actual Result in Dataset:** The price did **{'RISE' if true_label else 'NOT RISE'}**."
     output = f"""
     ## 🔮 Prediction Report for {card_sample['name']}
@@ -96,22 +97,37 @@ def predict_price_trend(card_display_name: str) -> str:
     return output
-iface = gr.Interface(
-    fn=predict_price_trend,
-    inputs=gr.Dropdown(
-        choices=card_choices,
-        label="Select a Pokémon Card",
-        info="Choose a card from the dataset to predict its price trend."
-    ),
-    outputs=gr.Markdown(),
-    title="PricePoke: Pokémon Card Price Trend Predictor",
-    description="""
-    Select a Pokémon card to predict whether its market price will increase by 30% or more over the next 6 months.
-    This model was trained on historical TCGPlayer market data.
-    """,
-    examples=[[card_choices[0]] if card_choices and ASSETS_LOADED else []],
-    allow_flagging="never"
-)
 if __name__ == "__main__":
-    iface.launch()

 import pandas as pd
 import os
 import json
 from safetensors.torch import load_file
 from typing import List, Tuple
 from network import PricePredictor
 TARGET_COLUMN = 'price_will_rise_30_in_6m'
 def load_model_and_config(model_dir: str) -> Tuple[torch.nn.Module, List[str]]:
     config_path = os.path.join(model_dir, "config.json")
     with open(config_path, "r") as f:
     return predicted_class, probability
+# --- Asset Loading ---
 try:
     model, feature_columns = load_model_and_config(MODEL_DIR)
     scaler = joblib.load(SCALER_PATH)
     full_data = pd.read_csv(DATA_PATH)
     ASSETS_LOADED = True
 except FileNotFoundError as e:
     print(f"Error loading necessary files: {e}")
     print("Please make sure you have uploaded the 'model' and 'data' directories to your Hugging Face Space.")
     ASSETS_LOADED = False
+def predict_price_trend(card_identifier: str) -> str:
     if not ASSETS_LOADED:
         return "## Application Error\nAssets could not be loaded. Please check the logs on Hugging Face Spaces for details. You may need to upload your `model` and `data` directories."
+    if not card_identifier or not card_identifier.strip().isdigit():
+        return "## Input Error\nPlease enter a valid, numeric TCGPlayer ID."
+    # --- Find Card Logic ---
+    card_id = int(card_identifier.strip())
+    card_data = full_data[full_data['tcgplayer_id'] == card_id]
     if card_data.empty:
+        return f"## Card Not Found\nCould not find a card with TCGPlayer ID '{card_id}'. Please check the ID and try again."
+    # Since tcgplayer_id is unique, we can safely take the first (and only) row.
     card_sample = card_data.iloc[0]
     sample_features = card_sample[feature_columns]
+    # --- Prediction Logic ---
     predicted_class, probability = perform_prediction(model, scaler, sample_features)
     prediction_text = "**RISE**" if predicted_class else "**NOT RISE**"
     confidence = probability if predicted_class else 1 - probability
+    tcgplayer_id = card_sample['tcgplayer_id']
     tcgplayer_link = f"https://www.tcgplayer.com/product/{tcgplayer_id}?Language=English"
+    # --- Output Formatting ---
     true_label_text = ""
+    try:
+        if TARGET_COLUMN in card_sample and pd.notna(card_sample[TARGET_COLUMN]):
+            true_label = bool(card_sample[TARGET_COLUMN])
+            true_label_text = f"\n- **Actual Result in Dataset:** The price did **{'RISE' if true_label else 'NOT RISE'}**."
+    except (KeyError, TypeError):
+        pass # If target column is missing or value is invalid, just skip this part.
     output = f"""
     ## 🔮 Prediction Report for {card_sample['name']}
     return output
+# --- Gradio UI ---
+with gr.Blocks(theme=gr.themes.Soft(), title="PricePoke Predictor") as demo:
+    gr.Markdown(
+        """
+        # 📈 PricePoke: Pokémon Card Price Trend Predictor
+        Enter a Pokémon card's TCGPlayer ID to predict whether its market price will increase by 30% or more over the next 6 months.
+        This model was trained on historical TCGPlayer market data.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            card_input = gr.Textbox(
+                label="TCGPlayer ID",
+                placeholder="e.g., '84198'",
+                info="Find the ID in the card's URL on TCGPlayer's website (e.g., tcgplayer.com/product/84198/... has ID 84198)."
+            )
+            predict_button = gr.Button("Predict Trend", variant="primary")
+            gr.Markdown("---")
+            gr.Markdown("### Example Cards")
+            if ASSETS_LOADED:
+                example_df = full_data.sample(5, random_state=42)[['name', 'tcgplayer_id']]
+                gr.Markdown(example_df.to_markdown(index=False))
+            else:
+                gr.Markdown("Could not load examples.")
+        with gr.Column(scale=2):
+            output_markdown = gr.Markdown()
+    predict_button.click(fn=predict_price_trend, inputs=[card_input], outputs=[output_markdown])
+    card_input.submit(fn=predict_price_trend, inputs=[card_input], outputs=[output_markdown])
 if __name__ == "__main__":
+    demo.launch()

explain_model.py ADDED Viewed

	@@ -0,0 +1,202 @@

+#!/usr/bin/env python3
+# explain_model.py
+import os
+import json
+import numpy as np
+import pandas as pd
+import torch
+import joblib
+import shap
+import matplotlib.pyplot as plt
+from safetensors.torch import load_file
+from network import PricePredictor
+# --- 0. Config ---
+MODEL_DIR = "model"
+DATA_DIR = "data"
+SCALER_PATH = os.path.join(DATA_DIR, "scaler.pkl")
+DATA_PATH = os.path.join(DATA_DIR, "pokemon_final_with_labels.csv")
+CONFIG_PATH = os.path.join(MODEL_DIR, "config.json")
+TARGET_COLUMN = "price_will_rise_30_in_6m"
+# --- 1. Load model & assets ---
+with open(CONFIG_PATH, "r") as f:
+    config = json.load(f)
+feature_columns = config["feature_columns"]
+input_size = config["input_size"]
+model = PricePredictor(input_size=input_size)
+model.load_state_dict(load_file(os.path.join(MODEL_DIR, "model.safetensors")))
+model.eval()
+scaler = joblib.load(SCALER_PATH)
+full_data = pd.read_csv(DATA_PATH)
+# Sanity checks
+missing_cols = [c for c in feature_columns if c not in full_data.columns]
+if missing_cols:
+    raise ValueError(f"Missing required feature columns in CSV: {missing_cols}")
+features_df = full_data[feature_columns]
+if features_df.shape[1] != input_size:
+    raise ValueError(
+        f"Config input_size={input_size}, but CSV provides {features_df.shape[1]} features. "
+        f"Ensure config['feature_columns'] matches the trained model."
+    )
+# --- 2. Prepare Data for SHAP ---
+bg_n = min(100, len(features_df))
+explain_n = min(10, len(features_df))
+background_idx = features_df.sample(n=bg_n, random_state=42).index
+explain_idx = features_df.sample(n=explain_n, random_state=1).index
+background_data = features_df.loc[background_idx]
+explain_instances = features_df.loc[explain_idx]
+# Use arrays for scaler to avoid feature-name warnings
+background_data_scaled = scaler.transform(background_data.values)
+explain_instances_scaled = scaler.transform(explain_instances.values)
+background_tensor = torch.tensor(background_data_scaled, dtype=torch.float32)  # no grad
+explain_tensor = torch.tensor(explain_instances_scaled, dtype=torch.float32, requires_grad=True)
+# --- Helpers ---
+def get_shap_explanations(model, background_tensor, explain_tensor):
+    """Try DeepExplainer then fall back to GradientExplainer. Return (explanation, explainer_used_name)."""
+    try:
+        print("Initializing SHAP DeepExplainer...")
+        explainer = shap.DeepExplainer(model, background_tensor)
+        print("Calculating SHAP values for the sample...")
+        exp = explainer(explain_tensor)
+        setattr(exp, "_expected_value_hint", getattr(explainer, "expected_value", None))
+        return exp, "deep"
+    except Exception as e:
+        print(f"[DeepExplainer failed: {e}] Falling back to GradientExplainer...")
+        explain_tensor.requires_grad_(True)
+        grad_explainer = shap.GradientExplainer(model, background_tensor)
+        exp = grad_explainer(explain_tensor)
+        setattr(exp, "_expected_value_hint", getattr(grad_explainer, "expected_value", None))
+        return exp, "grad"
+def compute_base_value_safe(shap_explanation, instance_idx, model, background_tensor):
+    """Return scalar base value robustly across SHAP versions."""
+    bv = getattr(shap_explanation, "base_values", None)
+    if bv is not None:
+        try:
+            return float(np.squeeze(bv[instance_idx]))
+        except Exception:
+            try:
+                return float(np.squeeze(bv))
+            except Exception:
+                pass
+    ev = getattr(shap_explanation, "_expected_value_hint", None)
+    if ev is not None:
+        try:
+            return float(np.squeeze(ev))
+        except Exception:
+            try:
+                return float(np.mean(ev))
+            except Exception:
+                pass
+    with torch.no_grad():
+        mu = background_tensor.mean(dim=0, keepdim=True)
+        out = model(mu).detach().cpu().squeeze()
+        return float(out.mean().item()) if out.numel() > 1 else float(out.item())
+def stack_sample_shap_values(exp, n_features_expected):
+    """
+    Some SHAP versions return exp.values with shape (n_samples, 1) or other oddities.
+    However, exp[i].values is typically the correct 1D (n_features,) vector.
+    We rebuild a full matrix by stacking per-sample slices.
+    """
+    rows = []
+    n_samples = len(exp.values) if hasattr(exp.values, "__len__") else len(exp)
+    # Safer: iterate using the __getitem__ API
+    for i in range(n_samples):
+        v = np.asarray(exp[i].values).reshape(-1,)
+        rows.append(v)
+    M = np.vstack(rows)  # (n_samples, n_features)
+    if M.shape[1] != n_features_expected:
+        raise RuntimeError(
+            f"Rebuilt SHAP matrix has shape {M.shape}; expected n_features={n_features_expected}."
+        )
+    return M
+# --- 3. Compute SHAP explanations ---
+shap_explanation, _ = get_shap_explanations(model, background_tensor, explain_tensor)
+print("Calculation complete.")
+# Attach unscaled display data for pretty plotting
+shap_explanation.display_data = explain_instances.values
+shap_explanation.feature_names = feature_columns
+# --- 4a. Global Feature Importance (Bar / Summary) ---
+print("\nGenerating global feature importance plot (summary_plot.png)...")
+# Robustly build a (n_samples, n_features) matrix by stacking per-sample vectors
+shap_vals_matrix = stack_sample_shap_values(shap_explanation, n_features_expected=len(feature_columns))
+mean_abs_shap = np.abs(shap_vals_matrix).mean(axis=0)  # (n_features,)
+# Build a fresh Explanation with values aligned to feature_names
+plot_explanation = shap.Explanation(values=mean_abs_shap, feature_names=feature_columns)
+plt.figure()
+shap.plots.bar(plot_explanation, show=False)
+plt.xlabel("mean(|SHAP value|) (average impact on model output magnitude)")
+plt.savefig("summary_plot.png", bbox_inches="tight")
+plt.close()
+print("Saved: summary_plot.png")
+# --- 4b. Local Explanation (Force Plot) ---
+print("\nGenerating local explanation for one card (force_plot.html)...")
+instance_to_explain_index = 0
+single_explanation = shap_explanation[instance_to_explain_index]
+# Some SHAP versions drop display_data on slicing; pull directly if needed
+if getattr(single_explanation, "display_data", None) is None:
+    row_unscaled = explain_instances.values[instance_to_explain_index]
+else:
+    row_unscaled = single_explanation.display_data
+features_row = np.atleast_2d(np.asarray(row_unscaled, dtype=float))
+base_val = compute_base_value_safe(shap_explanation, instance_to_explain_index, model, background_tensor)
+phi = np.asarray(single_explanation.values).reshape(-1,)  # (n_features,)
+force_plot = shap.force_plot(
+    base_val,
+    phi,
+    features=features_row,
+    feature_names=feature_columns
+)
+shap.save_html("force_plot.html", force_plot)
+print("Saved: force_plot.html (open in a browser)")
+# --- 4c. Optional: local waterfall PNG (often clearer) ---
+try:
+    print("Generating local waterfall plot (waterfall_single.png)...")
+    plt.figure()
+    shap.plots.waterfall(single_explanation, show=False, max_display=20)
+    plt.savefig("waterfall_single.png", bbox_inches="tight")
+    plt.close()
+    print("Saved: waterfall_single.png")
+except Exception as e:
+    print(f"Waterfall plot skipped (reason: {e})")
+# --- 5. Print metadata for the explained card ---
+original_card_data = full_data.loc[explain_idx[instance_to_explain_index]]
+name_val = original_card_data.get("name", "N/A")
+tcgp_val = original_card_data.get("tcgplayer_id", "N/A")
+label_val = original_card_data.get(TARGET_COLUMN, None)
+label_str = "RISE" if bool(label_val) else "NOT RISE" if label_val is not None else "N/A"
+print("\n--- Card Explained in force_plot.html / waterfall_single.png ---")
+print(f"Name: {name_val}")
+print(f"TCGPlayer ID: {tcgp_val}")
+print(f"Actual Outcome in Dataset: {label_str}")
+# TODO: convert the model into a format where i can share on hugging face as a model that can be pulled down and used
+# TODO: include the SHAP charts force_plot.html and summary_plot.png explaining the model, as well as compute some other evaluation metrics for explanation in the card

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ pandas
 numpy
 scikit-learn
 safetensors
-gradio

 numpy
 scikit-learn
 safetensors
+gradio
+tabulate