File size: 2,933 Bytes
24f95f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Catalog loader for public-apis dataset.
"""

import json
import logging
from typing import List, Dict, Any, Optional
from pathlib import Path
import httpx

logger = logging.getLogger(__name__)

# GitHub raw URL for public-apis catalog
PUBLIC_APIS_URL = "https://raw.githubusercontent.com/public-apis/public-apis/master/entries.json"
try:
    from app.config import DATA_DIR
except ImportError:
    DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"

CACHE_FILE = Path(DATA_DIR) / "api_catalog_cache.json"


def load_public_apis_catalog(use_cache: bool = True) -> List[Dict[str, Any]]:
    """
    Load public-apis catalog from GitHub or local cache.
    
    Args:
        use_cache: If True, use local cache if available
        
    Returns:
        List of API entries with:
        - API: API name
        - Description: API description
        - Auth: Auth type (apiKey, OAuth, None, etc.)
        - HTTPS: HTTPS support (True/False)
        - Cors: CORS support (yes/no/unknown)
        - Category: API category
        - Link: API documentation URL
    """
    # Try to load from cache first
    if use_cache and CACHE_FILE.exists():
        try:
            logger.info("Loading API catalog from cache")
            with open(CACHE_FILE, "r", encoding="utf-8") as f:
                data = json.load(f)
            logger.info(f"Loaded {len(data.get('entries', []))} APIs from cache")
            return data.get("entries", [])
        except Exception as e:
            logger.warning(f"Failed to load cache: {e}")
    
    # Fetch from GitHub
    try:
        logger.info("Fetching API catalog from GitHub")
        with httpx.Client(timeout=30) as client:
            response = client.get(PUBLIC_APIS_URL)
            response.raise_for_status()
            data = response.json()
        
        entries = data.get("entries", [])
        logger.info(f"Fetched {len(entries)} APIs from GitHub")
        
        # Save to cache
        try:
            CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
            with open(CACHE_FILE, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=2)
            logger.info("Saved API catalog to cache")
        except Exception as e:
            logger.warning(f"Failed to save cache: {e}")
        
        return entries
    
    except Exception as e:
        logger.error(f"Failed to fetch API catalog: {e}")
        return []


def get_api_by_name(name: str) -> Optional[Dict[str, Any]]:
    """Get a specific API by name."""
    catalog = load_public_apis_catalog()
    for api in catalog:
        if api.get("API", "").lower() == name.lower():
            return api
    return None


def get_apis_by_category(category: str) -> List[Dict[str, Any]]:
    """Get all APIs in a specific category."""
    catalog = load_public_apis_catalog()
    return [api for api in catalog if api.get("Category", "").lower() == category.lower()]