every-leaderboards / scripts /fetch_provider_logos.py
Linker1907's picture
Add official benchmarks leaderboard with OAuth support
58e6611
#!/usr/bin/env python3
"""
Fetch provider logos from HuggingFace API.
This script:
1. Reads all unique providers from leaderboard.json
2. Fetches avatar URLs from HuggingFace API for each provider
3. Saves the mapping to provider_logos.json
4. Updates leaderboard.json with providerLogoUrl field
Usage:
python3 scripts/fetch_provider_logos.py
"""
import json
import requests
from pathlib import Path
from typing import Dict, Optional
import time
# File paths
DATA_DIR = Path(__file__).parent.parent / "data"
LEADERBOARD_FILE = DATA_DIR / "leaderboard.json"
PROVIDER_LOGOS_FILE = DATA_DIR / "provider_logos.json"
def fetch_avatar_url(provider: str) -> Optional[str]:
"""
Fetch avatar URL for a provider from HuggingFace API.
Args:
provider: Provider/organization name
Returns:
Avatar URL if found, None otherwise
"""
try:
url = f"https://huggingface.co/api/organizations/{provider}/avatar"
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
return data.get("avatarUrl")
else:
print(
f" โš ๏ธ No avatar found for {provider} (status {response.status_code})"
)
return None
except Exception as e:
print(f" โŒ Error fetching avatar for {provider}: {e}")
return None
def get_unique_providers(leaderboard_data: dict) -> set:
"""Extract unique providers from leaderboard data."""
providers = set()
for model in leaderboard_data.get("models", []):
if "provider" in model:
providers.add(model["provider"])
return providers
def fetch_all_provider_logos(providers: set) -> Dict[str, str]:
"""
Fetch logos for all providers.
Args:
providers: Set of provider names
Returns:
Dictionary mapping provider name to avatar URL
"""
logo_mapping = {}
total = len(providers)
print(f"\n๐Ÿ” Fetching logos for {total} providers...\n")
for i, provider in enumerate(sorted(providers), 1):
print(f"[{i}/{total}] Fetching logo for: {provider}")
avatar_url = fetch_avatar_url(provider)
if avatar_url:
logo_mapping[provider] = avatar_url
print(f" โœ… Found: {avatar_url}")
# Be nice to the API - small delay between requests
if i < total:
time.sleep(0.5)
return logo_mapping
def update_leaderboard_with_logos(
leaderboard_data: dict, logo_mapping: Dict[str, str]
) -> dict:
"""
Add providerLogoUrl field to each model in leaderboard data.
Args:
leaderboard_data: Original leaderboard data
logo_mapping: Provider to avatar URL mapping
Returns:
Updated leaderboard data
"""
updated_count = 0
for model in leaderboard_data.get("models", []):
provider = model.get("provider")
if provider and provider in logo_mapping:
model["providerLogoUrl"] = logo_mapping[provider]
updated_count += 1
print(f"\nโœ… Updated {updated_count} models with logo URLs")
return leaderboard_data
def main():
print("=" * 60)
print("Provider Logo Fetcher for HuggingFace Organizations")
print("=" * 60)
# Load leaderboard data
print(f"\n๐Ÿ“– Loading leaderboard data from {LEADERBOARD_FILE}")
with open(LEADERBOARD_FILE, "r") as f:
leaderboard_data = json.load(f)
# Get unique providers
providers = get_unique_providers(leaderboard_data)
print(f"โœ… Found {len(providers)} unique providers")
# Fetch logos from HuggingFace API
logo_mapping = fetch_all_provider_logos(providers)
print(f"\n๐Ÿ“Š Summary:")
print(f" โ€ข Total providers: {len(providers)}")
print(f" โ€ข Logos fetched: {len(logo_mapping)}")
print(f" โ€ข Missing logos: {len(providers) - len(logo_mapping)}")
# Save logo mapping to file
print(f"\n๐Ÿ’พ Saving logo mapping to {PROVIDER_LOGOS_FILE}")
with open(PROVIDER_LOGOS_FILE, "w") as f:
json.dump(logo_mapping, f, indent=2, sort_keys=True)
print("โœ… Logo mapping saved")
# Update leaderboard data with logo URLs
print(f"\n๐Ÿ’พ Updating leaderboard data with logo URLs")
updated_leaderboard = update_leaderboard_with_logos(leaderboard_data, logo_mapping)
# Save updated leaderboard
with open(LEADERBOARD_FILE, "w") as f:
json.dump(updated_leaderboard, f, indent=2)
print(f"โœ… Updated leaderboard saved to {LEADERBOARD_FILE}")
# Show providers with missing logos
missing_providers = providers - set(logo_mapping.keys())
if missing_providers:
print(f"\nโš ๏ธ Providers without logos:")
for provider in sorted(missing_providers):
print(f" โ€ข {provider}")
print("\n" + "=" * 60)
print("โœ… Provider logo fetching complete!")
print("=" * 60)
if __name__ == "__main__":
main()