Spaces:

OpenEvals
/

every-leaderboards

Running

File size: 2,971 Bytes

155702e

#!/usr/bin/env python3
"""
Create a private HuggingFace dataset and upload leaderboard data.
"""

import os
import sys
from huggingface_hub import HfApi, create_repo


def main():
    # Get token from environment
    token = os.environ.get("HF_TOKEN")
    if not token:
        print("❌ Error: HF_TOKEN environment variable not set")
        print("Please set it with: export HF_TOKEN=your_token_here")
        sys.exit(1)

    # Initialize API
    api = HfApi(token=token)

    # Repository details
    repo_id = "OpenEvals/leaderboard-data"
    repo_type = "dataset"

    try:
        # Create the repository (private)
        print(f"📦 Creating private dataset: {repo_id}")
        repo_url = create_repo(
            repo_id=repo_id,
            token=token,
            repo_type=repo_type,
            private=True,
            exist_ok=True,
        )
        print(f"✅ Repository created/exists: {repo_url}")

        # Upload the JSON file
        print(f"📤 Uploading leaderboard.json...")
        api.upload_file(
            path_or_fileobj="data/leaderboard.json",
            path_in_repo="leaderboard.json",
            repo_id=repo_id,
            repo_type=repo_type,
            token=token,
            commit_message="Update leaderboard data",
        )
        print(f"✅ File uploaded successfully!")

        # Create README
        readme_content = """---
license: mit
private: true
---

# Leaderboard Data

This is a private dataset containing benchmark leaderboard data.

## Files

- `leaderboard.json` - Main leaderboard data with model scores across 12 benchmarks

## Structure

The JSON file contains:
- `metadata`: Version, last updated timestamp, counts
- `benchmarks`: Benchmark definitions and metadata
- `models`: Array of model entries with scores

## Usage

This dataset is private and requires authentication to access.

```python
from huggingface_hub import hf_hub_download

file = hf_hub_download(
    repo_id="OpenEvals/leaderboard-data",
    filename="leaderboard.json",
    repo_type="dataset",
    token="your_token"
)
```

Or fetch directly via URL (requires auth token):
```
https://huggingface.co/datasets/OpenEvals/leaderboard-data/resolve/main/leaderboard.json
```
"""

        print(f"📝 Creating README.md...")
        api.upload_file(
            path_or_fileobj=readme_content.encode(),
            path_in_repo="README.md",
            repo_id=repo_id,
            repo_type=repo_type,
            token=token,
            commit_message="Add README",
        )
        print(f"✅ README created!")

        print(f"\n🎉 Success! Dataset is ready at:")
        print(f"   https://huggingface.co/datasets/{repo_id}")
        print(f"\n📋 Data URL (requires auth):")
        print(
            f"   https://huggingface.co/datasets/{repo_id}/resolve/main/leaderboard.json"
        )

    except Exception as e:
        print(f"❌ Error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()