#!/usr/bin/env python3 """ Create a private HuggingFace dataset and upload leaderboard data. """ import os import sys from huggingface_hub import HfApi, create_repo def main(): # Get token from environment token = os.environ.get("HF_TOKEN") if not token: print("āŒ Error: HF_TOKEN environment variable not set") print("Please set it with: export HF_TOKEN=your_token_here") sys.exit(1) # Initialize API api = HfApi(token=token) # Repository details repo_id = "OpenEvals/leaderboard-data" repo_type = "dataset" try: # Create the repository (private) print(f"šŸ“¦ Creating private dataset: {repo_id}") repo_url = create_repo( repo_id=repo_id, token=token, repo_type=repo_type, private=True, exist_ok=True, ) print(f"āœ… Repository created/exists: {repo_url}") # Upload the JSON file print(f"šŸ“¤ Uploading leaderboard.json...") api.upload_file( path_or_fileobj="data/leaderboard.json", path_in_repo="leaderboard.json", repo_id=repo_id, repo_type=repo_type, token=token, commit_message="Update leaderboard data", ) print(f"āœ… File uploaded successfully!") # Create README readme_content = """--- license: mit private: true --- # Leaderboard Data This is a private dataset containing benchmark leaderboard data. ## Files - `leaderboard.json` - Main leaderboard data with model scores across 12 benchmarks ## Structure The JSON file contains: - `metadata`: Version, last updated timestamp, counts - `benchmarks`: Benchmark definitions and metadata - `models`: Array of model entries with scores ## Usage This dataset is private and requires authentication to access. ```python from huggingface_hub import hf_hub_download file = hf_hub_download( repo_id="OpenEvals/leaderboard-data", filename="leaderboard.json", repo_type="dataset", token="your_token" ) ``` Or fetch directly via URL (requires auth token): ``` https://huggingface.co/datasets/OpenEvals/leaderboard-data/resolve/main/leaderboard.json ``` """ print(f"šŸ“ Creating README.md...") api.upload_file( path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, repo_type=repo_type, token=token, commit_message="Add README", ) print(f"āœ… README created!") print(f"\nšŸŽ‰ Success! Dataset is ready at:") print(f" https://huggingface.co/datasets/{repo_id}") print(f"\nšŸ“‹ Data URL (requires auth):") print( f" https://huggingface.co/datasets/{repo_id}/resolve/main/leaderboard.json" ) except Exception as e: print(f"āŒ Error: {e}") sys.exit(1) if __name__ == "__main__": main()