File size: 2,971 Bytes
155702e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
"""
Create a private HuggingFace dataset and upload leaderboard data.
"""

import os
import sys
from huggingface_hub import HfApi, create_repo


def main():
    # Get token from environment
    token = os.environ.get("HF_TOKEN")
    if not token:
        print("❌ Error: HF_TOKEN environment variable not set")
        print("Please set it with: export HF_TOKEN=your_token_here")
        sys.exit(1)

    # Initialize API
    api = HfApi(token=token)

    # Repository details
    repo_id = "OpenEvals/leaderboard-data"
    repo_type = "dataset"

    try:
        # Create the repository (private)
        print(f"πŸ“¦ Creating private dataset: {repo_id}")
        repo_url = create_repo(
            repo_id=repo_id,
            token=token,
            repo_type=repo_type,
            private=True,
            exist_ok=True,
        )
        print(f"βœ… Repository created/exists: {repo_url}")

        # Upload the JSON file
        print(f"πŸ“€ Uploading leaderboard.json...")
        api.upload_file(
            path_or_fileobj="data/leaderboard.json",
            path_in_repo="leaderboard.json",
            repo_id=repo_id,
            repo_type=repo_type,
            token=token,
            commit_message="Update leaderboard data",
        )
        print(f"βœ… File uploaded successfully!")

        # Create README
        readme_content = """---
license: mit
private: true
---

# Leaderboard Data

This is a private dataset containing benchmark leaderboard data.

## Files

- `leaderboard.json` - Main leaderboard data with model scores across 12 benchmarks

## Structure

The JSON file contains:
- `metadata`: Version, last updated timestamp, counts
- `benchmarks`: Benchmark definitions and metadata
- `models`: Array of model entries with scores

## Usage

This dataset is private and requires authentication to access.

```python
from huggingface_hub import hf_hub_download

file = hf_hub_download(
    repo_id="OpenEvals/leaderboard-data",
    filename="leaderboard.json",
    repo_type="dataset",
    token="your_token"
)
```

Or fetch directly via URL (requires auth token):
```
https://huggingface.co/datasets/OpenEvals/leaderboard-data/resolve/main/leaderboard.json
```
"""

        print(f"πŸ“ Creating README.md...")
        api.upload_file(
            path_or_fileobj=readme_content.encode(),
            path_in_repo="README.md",
            repo_id=repo_id,
            repo_type=repo_type,
            token=token,
            commit_message="Add README",
        )
        print(f"βœ… README created!")

        print(f"\nπŸŽ‰ Success! Dataset is ready at:")
        print(f"   https://huggingface.co/datasets/{repo_id}")
        print(f"\nπŸ“‹ Data URL (requires auth):")
        print(
            f"   https://huggingface.co/datasets/{repo_id}/resolve/main/leaderboard.json"
        )

    except Exception as e:
        print(f"❌ Error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()