Spaces:
Running
Running
Commit
·
c2830c1
1
Parent(s):
dd7ca7d
change order of args for gr headers
Browse files
src/hf_eda_mcp/tools/analysis.py
CHANGED
|
@@ -32,10 +32,10 @@ MAX_UNIQUE_VALUES_TO_SHOW = 20
|
|
| 32 |
|
| 33 |
def analyze_dataset_features(
|
| 34 |
dataset_id: str,
|
| 35 |
-
hf_api_token: gr.Header,
|
| 36 |
split: str = "train",
|
| 37 |
sample_size: int = DEFAULT_ANALYSIS_SAMPLE_SIZE,
|
| 38 |
config_name: Optional[str] = None,
|
|
|
|
| 39 |
) -> Dict[str, Any]:
|
| 40 |
"""
|
| 41 |
Perform basic exploratory analysis on dataset features.
|
|
@@ -50,6 +50,7 @@ def analyze_dataset_features(
|
|
| 50 |
split: Dataset split to analyze (default: 'train')
|
| 51 |
sample_size: Number of samples to use for analysis (default: 1000, max: 50000)
|
| 52 |
config_name: Optional configuration name for multi-config datasets
|
|
|
|
| 53 |
|
| 54 |
Returns:
|
| 55 |
Dictionary containing comprehensive feature analysis:
|
|
|
|
| 32 |
|
| 33 |
def analyze_dataset_features(
|
| 34 |
dataset_id: str,
|
|
|
|
| 35 |
split: str = "train",
|
| 36 |
sample_size: int = DEFAULT_ANALYSIS_SAMPLE_SIZE,
|
| 37 |
config_name: Optional[str] = None,
|
| 38 |
+
hf_api_token: gr.Header = "",
|
| 39 |
) -> Dict[str, Any]:
|
| 40 |
"""
|
| 41 |
Perform basic exploratory analysis on dataset features.
|
|
|
|
| 50 |
split: Dataset split to analyze (default: 'train')
|
| 51 |
sample_size: Number of samples to use for analysis (default: 1000, max: 50000)
|
| 52 |
config_name: Optional configuration name for multi-config datasets
|
| 53 |
+
hf_api_token: Header parsed by Gradio when hf_api_token is provided in MCP configuration headers
|
| 54 |
|
| 55 |
Returns:
|
| 56 |
Dictionary containing comprehensive feature analysis:
|
src/hf_eda_mcp/tools/metadata.py
CHANGED
|
@@ -22,7 +22,7 @@ from hf_eda_mcp.error_handling import format_error_response, log_error_with_cont
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
|
| 25 |
-
def get_dataset_metadata(dataset_id: str,
|
| 26 |
"""
|
| 27 |
Retrieve comprehensive metadata for a HuggingFace dataset.
|
| 28 |
|
|
@@ -32,8 +32,8 @@ def get_dataset_metadata(dataset_id: str, hf_api_token: gr.Header, config_name:
|
|
| 32 |
|
| 33 |
Args:
|
| 34 |
dataset_id: HuggingFace dataset identifier (e.g., 'squad', 'glue', 'imdb')
|
| 35 |
-
hf_api_token: Header parsed by Gradio when hf_api_token is provided in MCP configuration headers
|
| 36 |
config_name: Optional configuration name for multi-config datasets
|
|
|
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
Dictionary containing comprehensive dataset metadata:
|
|
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
|
| 25 |
+
def get_dataset_metadata(dataset_id: str, config_name: Optional[str] = None, hf_api_token: gr.Header = "") -> Dict[str, Any]:
|
| 26 |
"""
|
| 27 |
Retrieve comprehensive metadata for a HuggingFace dataset.
|
| 28 |
|
|
|
|
| 32 |
|
| 33 |
Args:
|
| 34 |
dataset_id: HuggingFace dataset identifier (e.g., 'squad', 'glue', 'imdb')
|
|
|
|
| 35 |
config_name: Optional configuration name for multi-config datasets
|
| 36 |
+
hf_api_token: Header parsed by Gradio when hf_api_token is provided in MCP configuration headers
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
Dictionary containing comprehensive dataset metadata:
|
src/hf_eda_mcp/tools/sampling.py
CHANGED
|
@@ -30,11 +30,11 @@ VALID_SPLITS = {"train", "validation", "test", "dev", "val"}
|
|
| 30 |
|
| 31 |
def get_dataset_sample(
|
| 32 |
dataset_id: str,
|
| 33 |
-
hf_api_token: gr.Header,
|
| 34 |
split: str = "train",
|
| 35 |
num_samples: int = DEFAULT_SAMPLE_SIZE,
|
| 36 |
config_name: Optional[str] = None,
|
| 37 |
streaming: bool = True,
|
|
|
|
| 38 |
) -> Dict[str, Any]:
|
| 39 |
"""
|
| 40 |
Retrieve a sample of rows from a HuggingFace dataset.
|
|
@@ -49,6 +49,7 @@ def get_dataset_sample(
|
|
| 49 |
num_samples: Number of samples to retrieve (default: 10, max: 10000)
|
| 50 |
config_name: Optional configuration name for multi-config datasets
|
| 51 |
streaming: Whether to use streaming mode for efficient loading (default: True)
|
|
|
|
| 52 |
|
| 53 |
Returns:
|
| 54 |
Dictionary containing sampled data and metadata:
|
|
|
|
| 30 |
|
| 31 |
def get_dataset_sample(
|
| 32 |
dataset_id: str,
|
|
|
|
| 33 |
split: str = "train",
|
| 34 |
num_samples: int = DEFAULT_SAMPLE_SIZE,
|
| 35 |
config_name: Optional[str] = None,
|
| 36 |
streaming: bool = True,
|
| 37 |
+
hf_api_token: gr.Header = "",
|
| 38 |
) -> Dict[str, Any]:
|
| 39 |
"""
|
| 40 |
Retrieve a sample of rows from a HuggingFace dataset.
|
|
|
|
| 49 |
num_samples: Number of samples to retrieve (default: 10, max: 10000)
|
| 50 |
config_name: Optional configuration name for multi-config datasets
|
| 51 |
streaming: Whether to use streaming mode for efficient loading (default: True)
|
| 52 |
+
hf_api_token: Header parsed by Gradio when hf_api_token is provided in MCP configuration headers
|
| 53 |
|
| 54 |
Returns:
|
| 55 |
Dictionary containing sampled data and metadata:
|