File size: 4,949 Bytes
58e6611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python3
"""
Fetch provider logos from HuggingFace API.

This script:
1. Reads all unique providers from leaderboard.json
2. Fetches avatar URLs from HuggingFace API for each provider
3. Saves the mapping to provider_logos.json
4. Updates leaderboard.json with providerLogoUrl field

Usage:
    python3 scripts/fetch_provider_logos.py
"""

import json
import requests
from pathlib import Path
from typing import Dict, Optional
import time

# File paths
DATA_DIR = Path(__file__).parent.parent / "data"
LEADERBOARD_FILE = DATA_DIR / "leaderboard.json"
PROVIDER_LOGOS_FILE = DATA_DIR / "provider_logos.json"


def fetch_avatar_url(provider: str) -> Optional[str]:
    """
    Fetch avatar URL for a provider from HuggingFace API.

    Args:
        provider: Provider/organization name

    Returns:
        Avatar URL if found, None otherwise
    """
    try:
        url = f"https://huggingface.co/api/organizations/{provider}/avatar"
        response = requests.get(url, timeout=10)

        if response.status_code == 200:
            data = response.json()
            return data.get("avatarUrl")
        else:
            print(
                f"  โš ๏ธ  No avatar found for {provider} (status {response.status_code})"
            )
            return None

    except Exception as e:
        print(f"  โŒ Error fetching avatar for {provider}: {e}")
        return None


def get_unique_providers(leaderboard_data: dict) -> set:
    """Extract unique providers from leaderboard data."""
    providers = set()
    for model in leaderboard_data.get("models", []):
        if "provider" in model:
            providers.add(model["provider"])
    return providers


def fetch_all_provider_logos(providers: set) -> Dict[str, str]:
    """
    Fetch logos for all providers.

    Args:
        providers: Set of provider names

    Returns:
        Dictionary mapping provider name to avatar URL
    """
    logo_mapping = {}
    total = len(providers)

    print(f"\n๐Ÿ” Fetching logos for {total} providers...\n")

    for i, provider in enumerate(sorted(providers), 1):
        print(f"[{i}/{total}] Fetching logo for: {provider}")
        avatar_url = fetch_avatar_url(provider)

        if avatar_url:
            logo_mapping[provider] = avatar_url
            print(f"  โœ… Found: {avatar_url}")

        # Be nice to the API - small delay between requests
        if i < total:
            time.sleep(0.5)

    return logo_mapping


def update_leaderboard_with_logos(
    leaderboard_data: dict, logo_mapping: Dict[str, str]
) -> dict:
    """
    Add providerLogoUrl field to each model in leaderboard data.

    Args:
        leaderboard_data: Original leaderboard data
        logo_mapping: Provider to avatar URL mapping

    Returns:
        Updated leaderboard data
    """
    updated_count = 0

    for model in leaderboard_data.get("models", []):
        provider = model.get("provider")
        if provider and provider in logo_mapping:
            model["providerLogoUrl"] = logo_mapping[provider]
            updated_count += 1

    print(f"\nโœ… Updated {updated_count} models with logo URLs")
    return leaderboard_data


def main():
    print("=" * 60)
    print("Provider Logo Fetcher for HuggingFace Organizations")
    print("=" * 60)

    # Load leaderboard data
    print(f"\n๐Ÿ“– Loading leaderboard data from {LEADERBOARD_FILE}")
    with open(LEADERBOARD_FILE, "r") as f:
        leaderboard_data = json.load(f)

    # Get unique providers
    providers = get_unique_providers(leaderboard_data)
    print(f"โœ… Found {len(providers)} unique providers")

    # Fetch logos from HuggingFace API
    logo_mapping = fetch_all_provider_logos(providers)

    print(f"\n๐Ÿ“Š Summary:")
    print(f"  โ€ข Total providers: {len(providers)}")
    print(f"  โ€ข Logos fetched: {len(logo_mapping)}")
    print(f"  โ€ข Missing logos: {len(providers) - len(logo_mapping)}")

    # Save logo mapping to file
    print(f"\n๐Ÿ’พ Saving logo mapping to {PROVIDER_LOGOS_FILE}")
    with open(PROVIDER_LOGOS_FILE, "w") as f:
        json.dump(logo_mapping, f, indent=2, sort_keys=True)
    print("โœ… Logo mapping saved")

    # Update leaderboard data with logo URLs
    print(f"\n๐Ÿ’พ Updating leaderboard data with logo URLs")
    updated_leaderboard = update_leaderboard_with_logos(leaderboard_data, logo_mapping)

    # Save updated leaderboard
    with open(LEADERBOARD_FILE, "w") as f:
        json.dump(updated_leaderboard, f, indent=2)
    print(f"โœ… Updated leaderboard saved to {LEADERBOARD_FILE}")

    # Show providers with missing logos
    missing_providers = providers - set(logo_mapping.keys())
    if missing_providers:
        print(f"\nโš ๏ธ  Providers without logos:")
        for provider in sorted(missing_providers):
            print(f"  โ€ข {provider}")

    print("\n" + "=" * 60)
    print("โœ… Provider logo fetching complete!")
    print("=" * 60)


if __name__ == "__main__":
    main()