Spaces:
Sleeping
Sleeping
| """ | |
| Function tools for the AI agent. | |
| Defines tools that the agent can use to query and analyze data. | |
| """ | |
| from typing import Dict, List, Any, Optional | |
| from database import DataManager | |
| from utils import setup_logger, log_function_call | |
| from config import MAX_RESULTS_TO_LLM | |
| logger = setup_logger(__name__) | |
| class AgentTools: | |
| """ | |
| Collection of tools available to the AI agent for data operations. | |
| """ | |
| def __init__(self, data_manager: DataManager): | |
| """ | |
| Initializes tools with DataManager instance. | |
| Inputs: data_manager (DataManager) | |
| Outputs: None | |
| """ | |
| self.data_manager = data_manager | |
| logger.info("AgentTools initialized") | |
| def search_phones_by_criteria( | |
| self, | |
| brand: Optional[str] = None, | |
| min_price: Optional[float] = None, | |
| max_price: Optional[float] = None, | |
| ram_gb: Optional[int] = None, | |
| storage_gb: Optional[int] = None, | |
| os: Optional[str] = None, | |
| has_5g: Optional[bool] = None, | |
| min_rating: Optional[float] = None, | |
| limit: int = MAX_RESULTS_TO_LLM | |
| ) -> Dict[str, Any]: | |
| """ | |
| Searches for phones matching specified criteria. | |
| Inputs: brand, min_price, max_price, ram_gb, storage_gb, os, has_5g, min_rating, limit | |
| Outputs: dictionary with results and metadata | |
| """ | |
| parameters = { | |
| "brand": brand, | |
| "min_price": min_price, | |
| "max_price": max_price, | |
| "ram_gb": ram_gb, | |
| "storage_gb": storage_gb, | |
| "os": os, | |
| "has_5g": has_5g, | |
| "min_rating": min_rating, | |
| "limit": limit | |
| } | |
| log_function_call("search_phones_by_criteria", parameters) | |
| # Build filters | |
| filters = {} | |
| if brand: | |
| filters["brand"] = brand | |
| if min_price is not None or max_price is not None: | |
| price_filter = {} | |
| if min_price is not None: | |
| price_filter["min"] = min_price | |
| if max_price is not None: | |
| price_filter["max"] = max_price | |
| filters["price_usd"] = price_filter | |
| if ram_gb is not None: | |
| filters["ram_gb"] = ram_gb | |
| if storage_gb is not None: | |
| filters["storage_gb"] = storage_gb | |
| if os: | |
| filters["os"] = os | |
| if has_5g is not None: | |
| filters["5g_support"] = "Yes" if has_5g else "No" | |
| if min_rating is not None: | |
| filters["rating"] = {"min": min_rating} | |
| # Get filtered results | |
| results = self.data_manager.filter_data(filters, limit=limit) | |
| return { | |
| "success": True, | |
| "count": len(results), | |
| "results": results, | |
| "truncated": len(results) >= limit | |
| } | |
| def get_aggregated_statistics( | |
| self, | |
| group_by: str, | |
| metric: str = "price_usd", | |
| aggregation: str = "mean" | |
| ) -> Dict[str, Any]: | |
| """ | |
| Gets aggregated statistics grouped by a specific column. | |
| Inputs: group_by (e.g., 'brand', 'os'), metric (e.g., 'price_usd'), aggregation ('mean', 'sum', 'count', 'min', 'max') | |
| Outputs: dictionary with aggregated results | |
| """ | |
| parameters = { | |
| "group_by": group_by, | |
| "metric": metric, | |
| "aggregation": aggregation | |
| } | |
| log_function_call("get_aggregated_statistics", parameters) | |
| results = self.data_manager.aggregate_data(group_by, metric, aggregation) | |
| return { | |
| "success": True, | |
| "count": len(results), | |
| "group_by": group_by, | |
| "metric": metric, | |
| "aggregation": aggregation, | |
| "results": results | |
| } | |
| def get_price_analysis( | |
| self, | |
| brand: Optional[str] = None, | |
| category: Optional[str] = None | |
| ) -> Dict[str, Any]: | |
| """ | |
| Analyzes price distribution for a specific brand or category. | |
| Inputs: brand (optional), category (optional, e.g., 'os') | |
| Outputs: dictionary with price analysis | |
| """ | |
| parameters = { | |
| "brand": brand, | |
| "category": category | |
| } | |
| log_function_call("get_price_analysis", parameters) | |
| df = self.data_manager.get_dataframe() | |
| # Filter by brand if specified | |
| if brand: | |
| df = df[df['brand'] == brand] | |
| if len(df) == 0: | |
| return { | |
| "success": False, | |
| "error": "No data found for specified criteria" | |
| } | |
| analysis = { | |
| "success": True, | |
| "count": len(df), | |
| "avg_price": float(df['price_usd'].mean()), | |
| "min_price": float(df['price_usd'].min()), | |
| "max_price": float(df['price_usd'].max()), | |
| "median_price": float(df['price_usd'].median()), | |
| "std_price": float(df['price_usd'].std()), | |
| "price_range": float(df['price_usd'].max() - df['price_usd'].min()) | |
| } | |
| # Add percentiles | |
| analysis["percentile_25"] = float(df['price_usd'].quantile(0.25)) | |
| analysis["percentile_75"] = float(df['price_usd'].quantile(0.75)) | |
| # If category is specified, group by it | |
| if category and category in df.columns: | |
| category_stats = df.groupby(category)['price_usd'].agg(['mean', 'count']).reset_index() | |
| analysis["by_category"] = category_stats.to_dict('records') | |
| logger.info(f"Price analysis completed: avg=${analysis['avg_price']:.2f}, range=${analysis['price_range']:.2f}") | |
| return analysis | |
| def get_available_brands(self) -> Dict[str, Any]: | |
| """ | |
| Returns list of all available brands in the dataset. | |
| Inputs: None | |
| Outputs: dictionary with brand list | |
| """ | |
| log_function_call("get_available_brands", {}) | |
| brands = self.data_manager.get_unique_values("brand") | |
| return { | |
| "success": True, | |
| "count": len(brands), | |
| "brands": sorted(brands) | |
| } | |
| def get_dataset_overview(self) -> Dict[str, Any]: | |
| """ | |
| Returns overview statistics about the dataset. | |
| Inputs: None | |
| Outputs: dictionary with dataset overview | |
| """ | |
| log_function_call("get_dataset_overview", {}) | |
| stats = self.data_manager.get_summary_stats() | |
| return { | |
| "success": True, | |
| "overview": stats | |
| } | |
| def get_top_expensive_phones(self, limit: int = 3) -> Dict[str, Any]: | |
| """Returns the top N most expensive phones sorted by price descending. | |
| Inputs: limit (int, default 3) | |
| Outputs: dict with list of phones and metadata""" | |
| log_function_call("get_top_expensive_phones", {"limit": limit}) | |
| df = self.data_manager.get_dataframe() | |
| if 'price_usd' not in df.columns: | |
| return {"success": False, "error": "price_usd column not found"} | |
| df_sorted = df.sort_values(by='price_usd', ascending=False).head(limit).copy() | |
| results = df_sorted[['brand', 'model', 'price_usd', 'ram_gb', 'storage_gb', 'rating']].to_dict('records') | |
| return { | |
| "success": True, | |
| "count": len(results), | |
| "limit": limit, | |
| "results": results | |
| } | |
| def get_tool_definitions(self) -> List[Dict]: | |
| """ | |
| Returns OpenAI function definitions for all available tools. | |
| Inputs: None | |
| Outputs: list of tool definitions in OpenAI format | |
| """ | |
| return [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "search_phones_by_criteria", | |
| "description": "Search for mobile phones matching specific criteria like brand, price range, RAM, storage, OS, 5G support, and rating. Returns up to 'limit' matching phones.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "brand": { | |
| "type": "string", | |
| "description": "Phone brand (e.g., 'Apple', 'Samsung', 'Xiaomi')" | |
| }, | |
| "min_price": { | |
| "type": "number", | |
| "description": "Minimum price in USD" | |
| }, | |
| "max_price": { | |
| "type": "number", | |
| "description": "Maximum price in USD" | |
| }, | |
| "ram_gb": { | |
| "type": "integer", | |
| "description": "RAM size in GB" | |
| }, | |
| "storage_gb": { | |
| "type": "integer", | |
| "description": "Storage size in GB" | |
| }, | |
| "os": { | |
| "type": "string", | |
| "description": "Operating system (e.g., 'Android', 'iOS')" | |
| }, | |
| "has_5g": { | |
| "type": "boolean", | |
| "description": "Whether the phone supports 5G" | |
| }, | |
| "min_rating": { | |
| "type": "number", | |
| "description": "Minimum rating (0-5)" | |
| }, | |
| "limit": { | |
| "type": "integer", | |
| "description": "Maximum number of results to return (default 20)", | |
| "default": 20 | |
| } | |
| }, | |
| "required": [] | |
| } | |
| } | |
| }, | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_aggregated_statistics", | |
| "description": "Get aggregated statistics by grouping data. For example, get average price by brand, count by OS, etc.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "group_by": { | |
| "type": "string", | |
| "description": "Column to group by (e.g., 'brand', 'os', 'processor', 'release_month')" | |
| }, | |
| "metric": { | |
| "type": "string", | |
| "description": "Column to calculate metric on (e.g., 'price_usd', 'rating', 'battery_mah')", | |
| "default": "price_usd" | |
| }, | |
| "aggregation": { | |
| "type": "string", | |
| "enum": ["mean", "sum", "count", "min", "max"], | |
| "description": "Type of aggregation to perform", | |
| "default": "mean" | |
| } | |
| }, | |
| "required": ["group_by"] | |
| } | |
| } | |
| }, | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_price_analysis", | |
| "description": "Get detailed price analysis including average, min, max, median, standard deviation, and percentiles. Can be filtered by brand or grouped by category.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "brand": { | |
| "type": "string", | |
| "description": "Optional: Filter analysis to specific brand" | |
| }, | |
| "category": { | |
| "type": "string", | |
| "description": "Optional: Group analysis by category (e.g., 'os', 'processor')" | |
| } | |
| }, | |
| "required": [] | |
| } | |
| } | |
| }, | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_available_brands", | |
| "description": "Get a list of all available phone brands in the dataset.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": {}, | |
| "required": [] | |
| } | |
| } | |
| }, | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_dataset_overview", | |
| "description": "Get overview statistics about the dataset including total rows, columns, data types, and basic statistics.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": {}, | |
| "required": [] | |
| } | |
| } | |
| }, | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_top_expensive_phones", | |
| "description": "Return the top N most expensive phones sorted by price descending.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "limit": { | |
| "type": "integer", | |
| "description": "Number of phones to return (default 3)", | |
| "default": 3 | |
| } | |
| }, | |
| "required": [] | |
| } | |
| } | |
| } | |
| ] | |