""" Fetch chart data from OpenRouter provider pages. Used for refreshing dashboard data on demand. """ import json import re from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from pathlib import Path from typing import Any, Dict, List import requests PROVIDERS = [ "together", "baseten", "fireworks", "novita", "groq", "nebius", "openai", "google-vertex", "anthropic", ] def extract_chart_data(url: str) -> List[Dict[str, Any]]: """ Fetch the page and extract chart data from Next.js hydration. Args: url: The OpenRouter provider URL Returns: List of data points with structure: {x: timestamp, ys: {model: tokens, ...}} """ response = requests.get(url, timeout=30) response.raise_for_status() html = response.text # Find all self.__next_f.push() calls pattern = r'self\.__next_f\.push\((.*?)\)(?=\s*|self\.__next_f\.push)' matches = re.findall(pattern, html, re.DOTALL) chart_data = None for match in matches: try: data = json.loads(match) if len(data) >= 2: content = data[1] if isinstance(content, str): json_array_pattern = r'\[{["\']x["\']\s*:\s*["\'][\d\-\s:]+["\'].*?}\]' json_matches = re.findall(json_array_pattern, content, re.DOTALL) for json_match in json_matches: try: parsed = json.loads(json_match) if parsed and isinstance(parsed, list) and len(parsed) > 0: if isinstance(parsed[0], dict) and "x" in parsed[0] and "ys" in parsed[0]: chart_data = parsed break except (json.JSONDecodeError, ValueError): continue if chart_data: break except (json.JSONDecodeError, ValueError): continue return chart_data or [] def _fetch_provider_data(provider: str) -> Dict[str, Any]: """Fetch data for a single provider.""" url = f"https://openrouter.ai/provider/{provider}" try: data = extract_chart_data(url) return { "provider": provider, "url": url, "data": data, "fetched_at": datetime.utcnow().isoformat(), "success": True, } except Exception as e: return { "provider": provider, "url": url, "data": [], "error": str(e), "fetched_at": datetime.utcnow().isoformat(), "success": False, } def fetch_all_providers(providers: List[str] | None = None, max_workers: int = 5) -> Dict[str, Any]: """ Fetch data from all providers in parallel. Returns: Consolidated data structure """ providers = providers or PROVIDERS results = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_provider = { executor.submit(_fetch_provider_data, p): p for p in providers } for future in as_completed(future_to_provider): results.append(future.result()) results.sort(key=lambda x: x["provider"]) return { "fetched_at": datetime.utcnow().isoformat(), "providers": results, "summary": { "total_providers": len(results), "successful": sum(1 for r in results if r["success"]), "failed": sum(1 for r in results if not r["success"]), }, } def save_data(data: Dict[str, Any], output_dir: Path) -> None: """Save fetched data to the output directory.""" output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) providers_path = output_path / "providers" providers_path.mkdir(exist_ok=True) with open(output_path / "full_data.json", "w") as f: json.dump(data, f, indent=2) for provider_data in data["providers"]: provider_file = providers_path / f"{provider_data['provider']}.json" with open(provider_file, "w") as f: json.dump(provider_data, f, indent=2) summary = { "fetched_at": data["fetched_at"], "providers": {}, } for provider_data in data["providers"]: if provider_data["success"]: models = set() total_tokens = 0 date_range = None for point in provider_data["data"]: models.update(point["ys"].keys()) total_tokens += sum(point["ys"].values()) if provider_data["data"]: date_range = { "start": provider_data["data"][0]["x"], "end": provider_data["data"][-1]["x"], } summary["providers"][provider_data["provider"]] = { "data_points": len(provider_data["data"]), "unique_models": len(models), "total_tokens": total_tokens, "date_range": date_range, } else: summary["providers"][provider_data["provider"]] = { "error": provider_data.get("error", "Unknown error"), } with open(output_path / "summary.json", "w") as f: json.dump(summary, f, indent=2) def refresh_data(output_dir: Path) -> Dict[str, Any]: """ Fetch fresh data from all providers and save to output_dir. Returns: The fetched data dict (with summary) for API response """ data = fetch_all_providers() save_data(data, output_dir) return data