Spaces:
Running
Running
File size: 4,949 Bytes
58e6611 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | #!/usr/bin/env python3
"""
Fetch provider logos from HuggingFace API.
This script:
1. Reads all unique providers from leaderboard.json
2. Fetches avatar URLs from HuggingFace API for each provider
3. Saves the mapping to provider_logos.json
4. Updates leaderboard.json with providerLogoUrl field
Usage:
python3 scripts/fetch_provider_logos.py
"""
import json
import requests
from pathlib import Path
from typing import Dict, Optional
import time
# File paths
DATA_DIR = Path(__file__).parent.parent / "data"
LEADERBOARD_FILE = DATA_DIR / "leaderboard.json"
PROVIDER_LOGOS_FILE = DATA_DIR / "provider_logos.json"
def fetch_avatar_url(provider: str) -> Optional[str]:
"""
Fetch avatar URL for a provider from HuggingFace API.
Args:
provider: Provider/organization name
Returns:
Avatar URL if found, None otherwise
"""
try:
url = f"https://huggingface.co/api/organizations/{provider}/avatar"
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
return data.get("avatarUrl")
else:
print(
f" โ ๏ธ No avatar found for {provider} (status {response.status_code})"
)
return None
except Exception as e:
print(f" โ Error fetching avatar for {provider}: {e}")
return None
def get_unique_providers(leaderboard_data: dict) -> set:
"""Extract unique providers from leaderboard data."""
providers = set()
for model in leaderboard_data.get("models", []):
if "provider" in model:
providers.add(model["provider"])
return providers
def fetch_all_provider_logos(providers: set) -> Dict[str, str]:
"""
Fetch logos for all providers.
Args:
providers: Set of provider names
Returns:
Dictionary mapping provider name to avatar URL
"""
logo_mapping = {}
total = len(providers)
print(f"\n๐ Fetching logos for {total} providers...\n")
for i, provider in enumerate(sorted(providers), 1):
print(f"[{i}/{total}] Fetching logo for: {provider}")
avatar_url = fetch_avatar_url(provider)
if avatar_url:
logo_mapping[provider] = avatar_url
print(f" โ
Found: {avatar_url}")
# Be nice to the API - small delay between requests
if i < total:
time.sleep(0.5)
return logo_mapping
def update_leaderboard_with_logos(
leaderboard_data: dict, logo_mapping: Dict[str, str]
) -> dict:
"""
Add providerLogoUrl field to each model in leaderboard data.
Args:
leaderboard_data: Original leaderboard data
logo_mapping: Provider to avatar URL mapping
Returns:
Updated leaderboard data
"""
updated_count = 0
for model in leaderboard_data.get("models", []):
provider = model.get("provider")
if provider and provider in logo_mapping:
model["providerLogoUrl"] = logo_mapping[provider]
updated_count += 1
print(f"\nโ
Updated {updated_count} models with logo URLs")
return leaderboard_data
def main():
print("=" * 60)
print("Provider Logo Fetcher for HuggingFace Organizations")
print("=" * 60)
# Load leaderboard data
print(f"\n๐ Loading leaderboard data from {LEADERBOARD_FILE}")
with open(LEADERBOARD_FILE, "r") as f:
leaderboard_data = json.load(f)
# Get unique providers
providers = get_unique_providers(leaderboard_data)
print(f"โ
Found {len(providers)} unique providers")
# Fetch logos from HuggingFace API
logo_mapping = fetch_all_provider_logos(providers)
print(f"\n๐ Summary:")
print(f" โข Total providers: {len(providers)}")
print(f" โข Logos fetched: {len(logo_mapping)}")
print(f" โข Missing logos: {len(providers) - len(logo_mapping)}")
# Save logo mapping to file
print(f"\n๐พ Saving logo mapping to {PROVIDER_LOGOS_FILE}")
with open(PROVIDER_LOGOS_FILE, "w") as f:
json.dump(logo_mapping, f, indent=2, sort_keys=True)
print("โ
Logo mapping saved")
# Update leaderboard data with logo URLs
print(f"\n๐พ Updating leaderboard data with logo URLs")
updated_leaderboard = update_leaderboard_with_logos(leaderboard_data, logo_mapping)
# Save updated leaderboard
with open(LEADERBOARD_FILE, "w") as f:
json.dump(updated_leaderboard, f, indent=2)
print(f"โ
Updated leaderboard saved to {LEADERBOARD_FILE}")
# Show providers with missing logos
missing_providers = providers - set(logo_mapping.keys())
if missing_providers:
print(f"\nโ ๏ธ Providers without logos:")
for provider in sorted(missing_providers):
print(f" โข {provider}")
print("\n" + "=" * 60)
print("โ
Provider logo fetching complete!")
print("=" * 60)
if __name__ == "__main__":
main()
|