Spaces:

Narsil
/

eval_playground

Sleeping

App Files Files Community

eval_playground / get_popular_eval_datasets.py

Narsil

Push.

e2152af unverified 5 months ago

raw

history blame

3.63 kB

	#!/usr/bin/env python3
	"""
	Script to fetch the 10 most used evaluation datasets from Hugging Face.
	"""

	import requests
	from typing import List, Dict

	def get_popular_eval_datasets(limit: int = 10) -> List[Dict]:
	"""
	Fetch popular evaluation datasets from Hugging Face Hub API.

	Args:
	limit: Number of datasets to return

	Returns:
	List of dataset information dictionaries
	"""
	# Common evaluation dataset tags and keywords
	eval_keywords = [
	"evaluation", "benchmark", "eval", "test-set", "validation",
	"leaderboard", "assessment", "metric"
	]

	# Search for datasets with evaluation-related tags
	base_url = "https://huggingface.co/api/datasets"
	params = {
	"sort": "downloads", # Sort by most downloaded
	"direction": "-1", # Descending order
	"limit": 100, # Get more to filter
	"full": "true"
	}

	response = requests.get(base_url, params=params)
	response.raise_for_status()

	datasets = response.json()

	# Filter for evaluation datasets
	eval_datasets = []
	for dataset in datasets:
	# Check if dataset has evaluation-related tags or is commonly used for eval
	tags = dataset.get("tags", [])
	dataset_id = dataset.get("id", "").lower()

	# Check for eval keywords in tags or dataset name
	is_eval = any(
	any(keyword in str(tag).lower() for keyword in eval_keywords)
	for tag in tags
	) or any(keyword in dataset_id for keyword in eval_keywords)

	# Also include well-known evaluation datasets
	known_eval_datasets = [
	"glue", "superglue", "squad", "xnli", "hellaswag", "winogrande",
	"arc", "mmlu", "gsm8k", "humaneval", "mbpp", "truthfulqa",
	"bigbench", "c4", "piqa", "siqa", "boolq", "copa", "multirc",
	"record", "rte", "wic", "wsc", "cb", "axb", "axg", "swag",
	"race", "qnli", "wnli", "sst", "cola", "stsb", "mrpc", "qqp"
	]

	if any(known in dataset_id for known in known_eval_datasets):
	is_eval = True

	if is_eval:
	eval_datasets.append({
	"name": dataset.get("id", ""),
	"downloads": dataset.get("downloads", 0),
	"likes": dataset.get("likes", 0),
	"tags": [tag for tag in tags if isinstance(tag, str)][:5], # First 5 tags
	"description": dataset.get("description", "")[:200] # First 200 chars
	})

	# Sort by downloads and return top N
	eval_datasets.sort(key=lambda x: x["downloads"], reverse=True)
	return eval_datasets[:limit]

	def main():
	"""Main function to fetch and display popular evaluation datasets."""
	print("Fetching the 10 most used evaluation datasets from Hugging Face...\n")

	try:
	datasets = get_popular_eval_datasets(10)

	for i, dataset in enumerate(datasets, 1):
	print(f"{i}. {dataset['name']}")
	print(f" Downloads: {dataset['downloads']:,}")
	print(f" Likes: {dataset['likes']}")
	if dataset['tags']:
	print(f" Tags: {', '.join(dataset['tags'])}")
	if dataset['description']:
	print(f" Description: {dataset['description']}...")
	print()

	except requests.exceptions.RequestException as e:
	print(f"Error fetching data from Hugging Face: {e}")
	except Exception as e:
	print(f"An error occurred: {e}")

	if __name__ == "__main__":
	main()