#!/usr/bin/env python3 """ Fetch provider logos from HuggingFace API. This script: 1. Reads all unique providers from leaderboard.json 2. Fetches avatar URLs from HuggingFace API for each provider 3. Saves the mapping to provider_logos.json 4. Updates leaderboard.json with providerLogoUrl field Usage: python3 scripts/fetch_provider_logos.py """ import json import requests from pathlib import Path from typing import Dict, Optional import time # File paths DATA_DIR = Path(__file__).parent.parent / "data" LEADERBOARD_FILE = DATA_DIR / "leaderboard.json" PROVIDER_LOGOS_FILE = DATA_DIR / "provider_logos.json" def fetch_avatar_url(provider: str) -> Optional[str]: """ Fetch avatar URL for a provider from HuggingFace API. Args: provider: Provider/organization name Returns: Avatar URL if found, None otherwise """ try: url = f"https://huggingface.co/api/organizations/{provider}/avatar" response = requests.get(url, timeout=10) if response.status_code == 200: data = response.json() return data.get("avatarUrl") else: print( f" āš ļø No avatar found for {provider} (status {response.status_code})" ) return None except Exception as e: print(f" āŒ Error fetching avatar for {provider}: {e}") return None def get_unique_providers(leaderboard_data: dict) -> set: """Extract unique providers from leaderboard data.""" providers = set() for model in leaderboard_data.get("models", []): if "provider" in model: providers.add(model["provider"]) return providers def fetch_all_provider_logos(providers: set) -> Dict[str, str]: """ Fetch logos for all providers. Args: providers: Set of provider names Returns: Dictionary mapping provider name to avatar URL """ logo_mapping = {} total = len(providers) print(f"\nšŸ” Fetching logos for {total} providers...\n") for i, provider in enumerate(sorted(providers), 1): print(f"[{i}/{total}] Fetching logo for: {provider}") avatar_url = fetch_avatar_url(provider) if avatar_url: logo_mapping[provider] = avatar_url print(f" āœ… Found: {avatar_url}") # Be nice to the API - small delay between requests if i < total: time.sleep(0.5) return logo_mapping def update_leaderboard_with_logos( leaderboard_data: dict, logo_mapping: Dict[str, str] ) -> dict: """ Add providerLogoUrl field to each model in leaderboard data. Args: leaderboard_data: Original leaderboard data logo_mapping: Provider to avatar URL mapping Returns: Updated leaderboard data """ updated_count = 0 for model in leaderboard_data.get("models", []): provider = model.get("provider") if provider and provider in logo_mapping: model["providerLogoUrl"] = logo_mapping[provider] updated_count += 1 print(f"\nāœ… Updated {updated_count} models with logo URLs") return leaderboard_data def main(): print("=" * 60) print("Provider Logo Fetcher for HuggingFace Organizations") print("=" * 60) # Load leaderboard data print(f"\nšŸ“– Loading leaderboard data from {LEADERBOARD_FILE}") with open(LEADERBOARD_FILE, "r") as f: leaderboard_data = json.load(f) # Get unique providers providers = get_unique_providers(leaderboard_data) print(f"āœ… Found {len(providers)} unique providers") # Fetch logos from HuggingFace API logo_mapping = fetch_all_provider_logos(providers) print(f"\nšŸ“Š Summary:") print(f" • Total providers: {len(providers)}") print(f" • Logos fetched: {len(logo_mapping)}") print(f" • Missing logos: {len(providers) - len(logo_mapping)}") # Save logo mapping to file print(f"\nšŸ’¾ Saving logo mapping to {PROVIDER_LOGOS_FILE}") with open(PROVIDER_LOGOS_FILE, "w") as f: json.dump(logo_mapping, f, indent=2, sort_keys=True) print("āœ… Logo mapping saved") # Update leaderboard data with logo URLs print(f"\nšŸ’¾ Updating leaderboard data with logo URLs") updated_leaderboard = update_leaderboard_with_logos(leaderboard_data, logo_mapping) # Save updated leaderboard with open(LEADERBOARD_FILE, "w") as f: json.dump(updated_leaderboard, f, indent=2) print(f"āœ… Updated leaderboard saved to {LEADERBOARD_FILE}") # Show providers with missing logos missing_providers = providers - set(logo_mapping.keys()) if missing_providers: print(f"\nāš ļø Providers without logos:") for provider in sorted(missing_providers): print(f" • {provider}") print("\n" + "=" * 60) print("āœ… Provider logo fetching complete!") print("=" * 60) if __name__ == "__main__": main()