PokeRoboAdvisor / src /tools.py
zypchn's picture
Update src/tools.py
dea58ed verified
import pandas as pd
from rapidfuzz import process, fuzz
import os
from pathlib import Path
class PokemonAdvisorTools():
"""
A central class to house all data retrieval and analysis tools
for the cAsh MCP Robo-Advisor.
"""
def __init__(self, data_dir: str = None):
"""
Initialize the tools with data loading.
Args:
data_dir: Directory containing the CSV files. If None, uses current directory.
"""
# Determine data directory
if data_dir is None:
data_dir = Path(__file__).parent # Same directory as tools.py
else:
data_dir = Path(data_dir)
# Construct file paths
kb_path = data_dir / "mcp_knowledge_base.csv"
kb_latest_path = data_dir / "mcp_knowledge_base_latest.csv"
# Load data with error handling
try:
print(f"Loading data from: {data_dir}")
print(f"Looking for: {kb_path}")
print(f"Looking for: {kb_latest_path}")
if not kb_path.exists():
raise FileNotFoundError(f"File not found: {kb_path}")
if not kb_latest_path.exists():
raise FileNotFoundError(f"File not found: {kb_latest_path}")
self.knowledge_base = pd.read_csv(kb_path)
self.knowledge_base_latest = pd.read_csv(kb_latest_path)
# Extract unique values
self.ALL_PROD_NAMES = self.knowledge_base['prod_name'].values
self.ALL_ARTIST_NAMES = self.knowledge_base['artist'].values
self.ALL_SET_NAMES = self.knowledge_base['set_name'].values
print(f"βœ“ Successfully loaded {len(self.knowledge_base)} records from knowledge_base")
print(f"βœ“ Successfully loaded {len(self.knowledge_base_latest)} records from knowledge_base_latest")
print(f"βœ“ Found {len(self.ALL_SET_NAMES)} unique sets")
except Exception as e:
print(f"❌ ERROR loading data: {e}")
print(f"Current working directory: {os.getcwd()}")
print(f"Files in data directory: {list(data_dir.glob('*.csv')) if data_dir.exists() else 'Directory not found'}")
# Initialize empty DataFrames as fallback
self.knowledge_base = pd.DataFrame()
self.knowledge_base_latest = pd.DataFrame()
self.ALL_PROD_NAMES = []
self.ALL_ARTIST_NAMES = []
self.ALL_SET_NAMES = []
raise RuntimeError(f"Failed to load Pokemon card data: {e} Current working directory: {os.getcwd()}")
def get_data_shape(self) -> int:
"""
Retrieves the number of set names.
Use this when user asks about the number of unique sets.
Returns:
int: length of knowledge base
"""
return len(self.ALL_SET_NAMES)
def list_card_names(self, name_query: str) -> list:
"""
Retrieves a list of card names from the database.
Use this tool when the user says the card you provided is not what they are looking for.
Args:
name_query (str): The name of the card to search for (e.g., "Umbreon GX").
The tool uses fuzzy matching, so exact spelling is not required.
Returns:
list: A list of 'prod_name's that matches the 'name_query'
"""
if len(self.ALL_PROD_NAMES) == 0:
return {"error": "Data not loaded."}
prod_names_match = process.extract(name_query, self.ALL_PROD_NAMES, scorer=fuzz.WRatio, limit=5)
return [name[0].replace("_", " ") for name in prod_names_match]
def get_card_info(self, name_query: str) -> dict:
"""
Retrieves comprehensive financial and metadata for a specific Pokemon card.
Use this tool when you need to know the current price, 6-month trend, or
general details of a card.
Args:
name_query (str): The name of the card to search for (e.g., "Charizard VMAX").
The tool uses fuzzy matching, so exact spelling is not required.
Returns:
dict: A dictionary containing 'used_price', 'graded_price', 'trend_6',
and other key metrics. Returns an 'error' key if not found.
"""
if len(self.ALL_PROD_NAMES) == 0:
return {"error": "Data not loaded."}
match = process.extractOne(name_query, self.ALL_PROD_NAMES, scorer=fuzz.WRatio)
if not match or match[1] < 70:
return {"error": f"Card '{name_query}' not found. Please check spelling."}
prod_name = match[0]
card_df = self.knowledge_base_latest[self.knowledge_base_latest["prod_name"] == prod_name]
if card_df.empty:
return {"error": f"Data missing for '{prod_name}'."}
return card_df.to_dict(orient="records")[0]
def find_grading_opportunities(self, max_price: float = 100, min_profit: float = 20) -> list:
"""
Scans the market for 'Arbitrage' opportunities where the gap between the Raw
and Graded price is largest.
Use this tool when the user asks for "buying recommendations," "profitable cards,"
or "what should I grade?".
Args:
max_price (float): The maximum price willing to pay for the raw card. Default is 100.
min_profit (float): The minimum profit (Graded Price - Raw Price - Fees) desired. Default is 20.
Returns:
list: A list of dictionaries representing the top 10 most profitable opportunities,
sorted by 'grade_profit' descending.
"""
profitable_grades = self.knowledge_base_latest[self.knowledge_base_latest["is_grade_profitable"] == True]
profitable_grades = profitable_grades[profitable_grades["used_price"] <= max_price]
min_profit_grades = profitable_grades[profitable_grades["grade_profit"] >= min_profit]
min_profit_grades = min_profit_grades.sort_values(
by="grade_profit", ascending=False
).head(10)
output_columns = [
"prod_name",
"used_price",
"graded_price",
"grade_profit",
"grade_profit_ratio",
"is_popular_pokemon",
"artist"
]
min_profit_grades = min_profit_grades[output_columns]
return min_profit_grades.to_dict(orient="records")
def get_market_movers(self, sort_by: str = "uptrend", interval: int = 6, market_type: str = "used") -> list:
"""
Identifies cards with the strongest positive or negative price trends over a sustained period (3 or 6 months).
Use this tool when users ask about "long-term growth," "steady winners," "market crashers,"
or "which cards are consistently losing value."
NOTE: Use this for TRENDS. Use `get_recent_price_spikes` for sudden, short-term JUMPS.
Args:
sort_by (str): "uptrend" to find biggest gainers, "downtrend" to find biggest losers. Default is "uptrend".
interval (int): The time period in months to analyze (3 or 6). Default is 6.
market_type (str): "used" (Raw) or "graded" (Slab). Default is "used".
Returns:
list: A list of the top 10 cards matching the trend criteria, including their percentage change.
"""
market_move_data = self.knowledge_base_latest.sort_values(
by=f"{market_type}_trend_{interval}",
ascending=(sort_by != "uptrend")
).head(10)
output_columns = ["prod_name", "used_price", "graded_price", f"{market_type}_trend_{interval}"]
market_move_data = market_move_data[output_columns]
return market_move_data.to_dict(orient="records")
def _calculate_risk_label(self, vol, low_threshold, high_threshold):
"""Helper function for volatility assessment tool."""
if vol < low_threshold:
return "🟒 Low Volatility (Stable/Blue Chip)"
elif vol > high_threshold:
return "πŸ”΄ High Volatility (Speculative)"
else:
return "🟑 Medium Volatility"
def assess_risk_volatility(self, card_name: str, interval: int = 6) -> dict:
"""
Calculates the risk profile of a card based on its price volatility over time.
ALWAYS use this tool before recommending an investment.
Args:
card_name (str): The name of the card to analyze.
interval (int): The time period in months to analyze (must be 3 or 6). Default is 6.
Returns:
dict: Contains 'volatility_assessment' (Low/Medium/High) and raw metrics.
"""
try:
interval = int(interval)
except ValueError:
return {"error": "Invalid 'interval' value. Must be 3 or 6."}
card_info = self.get_card_info(card_name)
if "error" in card_info:
return card_info
if interval not in [3, 6]:
return {"error": f"Invalid interval requested: {interval}. Only 3 or 6 months are supported."}
if interval == 3:
# 3-Month Thresholds
used_vol_low_threshold = 0.533
used_vol_high_threshold = 4.969
graded_vol_low_threshold = 0.982
graded_vol_high_threshold = 4.367
used_volatility = card_info.get("used_vol_3")
graded_volatility = card_info.get("graded_vol_3")
elif interval == 6:
# 6-Month Threshold
used_vol_low_threshold = 0.785
used_vol_high_threshold = 9.092
graded_vol_low_threshold = 2.250
graded_vol_high_threshold = 11.905
used_volatility = card_info.get("used_vol_6")
graded_volatility = card_info.get("graded_vol_6")
if used_volatility is None or graded_volatility is None:
return {"error": f"Volatility data missing for {card_name} at {interval} months."}
return {
"used_volatility": used_volatility,
"graded_volatility": graded_volatility,
f"used_volatility_assessment_{interval}_months": self._calculate_risk_label(used_volatility, used_vol_low_threshold, used_vol_high_threshold),
f"graded_volatility_assessment_{interval}_months": self._calculate_risk_label(graded_volatility, graded_vol_low_threshold, graded_vol_high_threshold),
}
def get_roi_metrics(self, card_name: str) -> dict:
"""
Retrieves the historical Return on Investment (ROI) percentages.
Use this tool to show how a card has performed in the past (e.g., "Is it going up?").
Args:
card_name (str): The name of the card.
Returns:
dict: Returns 3-month and 6-month ROI percentages for both Used and Graded conditions.
"""
card_info = self.get_card_info(card_name)
if "error" in card_info:
return card_info
return {
"used_price": card_info.get("used_price"),
"used_return_3_months": card_info.get("used_return_3"),
"used_return_6_months": card_info.get("used_return_6"),
"graded_return_3_months": card_info.get("graded_return_3"),
"graded_return_6_months": card_info.get("graded_return_6")
}
def get_recent_price_spikes(self, market_type: str = "used") -> list:
"""
Identifies cards that have recently experienced a significant price jump ("Spike").
Use this tool when users ask about "market movers," "hype," or "what is popping right now."
Args:
market_type (str): Either "used" (Raw) or "graded" (Slab). Default is "used".
Returns:
list: Top 20 cards with the highest recent positive price change.
"""
market_type = market_type.lower().strip()
if market_type == "used":
jump_data = self.knowledge_base_latest[self.knowledge_base_latest["used_jump_up"] == True]
jump_data = jump_data.sort_values("used_price", ascending=False).head(20)
output_columns = ["prod_name", "set_name", "used_price"]
return jump_data[output_columns].to_dict(orient="records")
elif market_type == "graded":
jump_data = self.knowledge_base_latest[self.knowledge_base_latest["graded_jump_up"] == True]
jump_data = jump_data.sort_values("graded_price", ascending=False).head(20)
output_columns = ["prod_name", "set_name", "graded_price"]
return jump_data[output_columns].to_dict(orient="records")
else:
return {"error": f"Invalid market_type '{market_type}'. Please use 'used' or 'graded'."}
def find_cards_by_artist(self, artist_name: str) -> dict:
"""
Finds profitable or popular cards illustrated by a specific artist.
Use this for "Niche" requests or when users ask about art styles.
Args:
artist_name (str): The artist's name.
Returns:
dict: A list of cards by that artist, sorted by profitability.
"""
if len(self.ALL_ARTIST_NAMES) == 0:
return {"error": "Data not loaded."}
artist_match = process.extractOne(artist_name, self.ALL_ARTIST_NAMES, scorer=fuzz.WRatio)
if not artist_match or artist_match[1] < 75:
return {"error": f"Artist '{artist_name}' not found or matched with low confidence."}
artist_name_match = artist_match[0]
artist_card_data = self.knowledge_base_latest[self.knowledge_base_latest["artist"] == artist_name_match]
profitable_cards = artist_card_data[artist_card_data["is_grade_profitable"] == True]
profitable_cards = profitable_cards.sort_values(by="grade_profit", ascending=False).head(20)
output_columns = [
"prod_name",
"set_name",
"used_price",
"grade_profit",
"grade_profit_ratio"
]
if profitable_cards.empty:
return {"result": f"No currently profitable cards found by artist {artist_name_match} in the latest data."}
return {
"artist": artist_name_match,
"cards": profitable_cards[output_columns].to_dict(orient="records")
}
def analyze_set_performance(self, set_name: str) -> dict:
"""
Aggregates data to analyze the overall health and sentiment of a specific Card Set.
Use this when users ask about broad trends like "How is Evolving Skies doing?"
rather than specific cards.
Args:
set_name (str): The name of the set (e.g., "Evolving Skies"). Fuzzy matched.
Returns:
dict: Average trends, average profitability, and the set's 'Chase Card'.
"""
if len(self.ALL_SET_NAMES) == 0:
return {"error": "Data not loaded."}
set_match = process.extractOne(set_name, self.ALL_SET_NAMES, scorer=fuzz.WRatio)
if not set_match or set_match[1] < 70:
return {"error": f"Set '{set_name}' not found. Available sets: {list(self.ALL_SET_NAMES[:5])}"}
set_name_match = set_match[0]
set_card_data = self.knowledge_base_latest[self.knowledge_base_latest["set_name"] == set_name_match]
if set_card_data.empty:
return {"error": f"No data found for set: {set_name_match}"}
total_cards = len(set_card_data)
avg_trend_6 = set_card_data["used_trend_6"].mean()
avg_grade_profit = set_card_data["grade_profit"].mean()
chase_card_row = set_card_data.sort_values('used_price', ascending=False).iloc[0]
return {
"set_name": set_name_match.replace("_", " "),
"total_cards_tracked": total_cards,
"market_sentiment_6mo": f"{avg_trend_6:.2f}%",
"avg_grading_profit": f"${avg_grade_profit:.2f}",
"chase_card": chase_card_row['prod_name'],
"chase_card_price": f"${chase_card_row['used_price']:.2f}"
}