| from .supported_datasets import ( |
| supported_datasets, |
| internal_datasets, |
| possible_with_vector_reps, |
| standard_data_benchmark, |
| testing, |
| ) |
|
|
|
|
| def list_supported_datasets(with_descriptions=True): |
| """ |
| Lists all supported datasets with optional descriptions. |
| |
| Args: |
| with_descriptions (bool): Whether to include descriptions (if available) |
| """ |
| try: |
| from .dataset_descriptions import dataset_descriptions |
| has_descriptions = True |
| except ImportError: |
| has_descriptions = False |
| |
| if not with_descriptions or not has_descriptions: |
| print("\n=== Supported Datasets ===\n") |
| for dataset_name in supported_datasets: |
| print(f"- {dataset_name}: {supported_datasets[dataset_name]}") |
| return |
| |
| print("\n=== Supported Datasets ===\n") |
| |
| |
| max_name_len = max(len(name) for name in supported_datasets) |
| max_type_len = max(len(dataset_descriptions.get(name, {}).get('type', 'Unknown')) for name in supported_datasets if name in dataset_descriptions) |
| max_task_len = max(len(dataset_descriptions.get(name, {}).get('task', 'Unknown')) for name in supported_datasets if name in dataset_descriptions) |
| |
| |
| print(f"{'Dataset':<{max_name_len+2}}{'Type':<{max_type_len+2}}{'Task':<{max_task_len+2}}Description") |
| print("-" * (max_name_len + max_type_len + max_task_len + 50)) |
| |
| |
| for dataset_name in supported_datasets: |
| if dataset_name in dataset_descriptions: |
| dataset_info = dataset_descriptions[dataset_name] |
| print(f"{dataset_name:<{max_name_len+2}}{dataset_info.get('type', 'Unknown'):<{max_type_len+2}}{dataset_info.get('task', 'Unknown'):<{max_task_len+2}}{dataset_info.get('description', 'No description available')}") |
| else: |
| print(f"{dataset_name:<{max_name_len+2}}{'Unknown':<{max_type_len+2}}{'Unknown':<{max_task_len+2}}No description available") |
| |
| print("\n=== Standard Benchmark Datasets ===\n") |
| for dataset_name in standard_data_benchmark: |
| print(f"- {dataset_name}") |
|
|
|
|
| def get_dataset_info(dataset_name): |
| """ |
| Get detailed information about a specific dataset. |
| |
| Args: |
| dataset_name (str): Name of the dataset |
| |
| Returns: |
| dict: Dataset information or None if not found |
| """ |
| try: |
| from .dataset_descriptions import dataset_descriptions |
| if dataset_name in dataset_descriptions: |
| return dataset_descriptions[dataset_name] |
| except ImportError: |
| pass |
| |
| if dataset_name in supported_datasets: |
| return {"name": dataset_name, "source": supported_datasets[dataset_name]} |
| |
| return None |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
| import argparse |
| |
| parser = argparse.ArgumentParser(description='List and describe supported datasets') |
| parser.add_argument('--list', action='store_true', help='List all supported datasets') |
| parser.add_argument('--info', type=str, help='Get information about a specific dataset') |
| args = parser.parse_args() |
| |
| if len(sys.argv) == 1: |
| parser.print_help() |
| sys.exit(1) |
| |
| if args.list: |
| list_supported_datasets() |
| |
| if args.info: |
| dataset_info = get_dataset_info(args.info) |
| if dataset_info: |
| print(f"\n=== Dataset: {args.info} ===\n") |
| for key, value in dataset_info.items(): |
| print(f"{key.capitalize()}: {value}") |
| else: |
| print(f"Dataset '{args.info}' not found in supported datasets.") |