Spaces:
Sleeping
Sleeping
| """ | |
| Fetch chart data from OpenRouter provider pages. | |
| Used for refreshing dashboard data on demand. | |
| """ | |
| import json | |
| import re | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Any, Dict, List | |
| import requests | |
| PROVIDERS = [ | |
| "together", | |
| "baseten", | |
| "fireworks", | |
| "novita", | |
| "groq", | |
| "nebius", | |
| "openai", | |
| "google-vertex", | |
| "anthropic", | |
| ] | |
| def extract_chart_data(url: str) -> List[Dict[str, Any]]: | |
| """ | |
| Fetch the page and extract chart data from Next.js hydration. | |
| Args: | |
| url: The OpenRouter provider URL | |
| Returns: | |
| List of data points with structure: {x: timestamp, ys: {model: tokens, ...}} | |
| """ | |
| response = requests.get(url, timeout=30) | |
| response.raise_for_status() | |
| html = response.text | |
| # Find all self.__next_f.push() calls | |
| pattern = r'self\.__next_f\.push\((.*?)\)(?=\s*</script>|self\.__next_f\.push)' | |
| matches = re.findall(pattern, html, re.DOTALL) | |
| chart_data = None | |
| for match in matches: | |
| try: | |
| data = json.loads(match) | |
| if len(data) >= 2: | |
| content = data[1] | |
| if isinstance(content, str): | |
| json_array_pattern = r'\[{["\']x["\']\s*:\s*["\'][\d\-\s:]+["\'].*?}\]' | |
| json_matches = re.findall(json_array_pattern, content, re.DOTALL) | |
| for json_match in json_matches: | |
| try: | |
| parsed = json.loads(json_match) | |
| if parsed and isinstance(parsed, list) and len(parsed) > 0: | |
| if isinstance(parsed[0], dict) and "x" in parsed[0] and "ys" in parsed[0]: | |
| chart_data = parsed | |
| break | |
| except (json.JSONDecodeError, ValueError): | |
| continue | |
| if chart_data: | |
| break | |
| except (json.JSONDecodeError, ValueError): | |
| continue | |
| return chart_data or [] | |
| def _fetch_provider_data(provider: str) -> Dict[str, Any]: | |
| """Fetch data for a single provider.""" | |
| url = f"https://openrouter.ai/provider/{provider}" | |
| try: | |
| data = extract_chart_data(url) | |
| return { | |
| "provider": provider, | |
| "url": url, | |
| "data": data, | |
| "fetched_at": datetime.utcnow().isoformat(), | |
| "success": True, | |
| } | |
| except Exception as e: | |
| return { | |
| "provider": provider, | |
| "url": url, | |
| "data": [], | |
| "error": str(e), | |
| "fetched_at": datetime.utcnow().isoformat(), | |
| "success": False, | |
| } | |
| def fetch_all_providers(providers: List[str] | None = None, max_workers: int = 5) -> Dict[str, Any]: | |
| """ | |
| Fetch data from all providers in parallel. | |
| Returns: | |
| Consolidated data structure | |
| """ | |
| providers = providers or PROVIDERS | |
| results = [] | |
| with ThreadPoolExecutor(max_workers=max_workers) as executor: | |
| future_to_provider = { | |
| executor.submit(_fetch_provider_data, p): p for p in providers | |
| } | |
| for future in as_completed(future_to_provider): | |
| results.append(future.result()) | |
| results.sort(key=lambda x: x["provider"]) | |
| return { | |
| "fetched_at": datetime.utcnow().isoformat(), | |
| "providers": results, | |
| "summary": { | |
| "total_providers": len(results), | |
| "successful": sum(1 for r in results if r["success"]), | |
| "failed": sum(1 for r in results if not r["success"]), | |
| }, | |
| } | |
| def save_data(data: Dict[str, Any], output_dir: Path) -> None: | |
| """Save fetched data to the output directory.""" | |
| output_path = Path(output_dir) | |
| output_path.mkdir(parents=True, exist_ok=True) | |
| providers_path = output_path / "providers" | |
| providers_path.mkdir(exist_ok=True) | |
| with open(output_path / "full_data.json", "w") as f: | |
| json.dump(data, f, indent=2) | |
| for provider_data in data["providers"]: | |
| provider_file = providers_path / f"{provider_data['provider']}.json" | |
| with open(provider_file, "w") as f: | |
| json.dump(provider_data, f, indent=2) | |
| summary = { | |
| "fetched_at": data["fetched_at"], | |
| "providers": {}, | |
| } | |
| for provider_data in data["providers"]: | |
| if provider_data["success"]: | |
| models = set() | |
| total_tokens = 0 | |
| date_range = None | |
| for point in provider_data["data"]: | |
| models.update(point["ys"].keys()) | |
| total_tokens += sum(point["ys"].values()) | |
| if provider_data["data"]: | |
| date_range = { | |
| "start": provider_data["data"][0]["x"], | |
| "end": provider_data["data"][-1]["x"], | |
| } | |
| summary["providers"][provider_data["provider"]] = { | |
| "data_points": len(provider_data["data"]), | |
| "unique_models": len(models), | |
| "total_tokens": total_tokens, | |
| "date_range": date_range, | |
| } | |
| else: | |
| summary["providers"][provider_data["provider"]] = { | |
| "error": provider_data.get("error", "Unknown error"), | |
| } | |
| with open(output_path / "summary.json", "w") as f: | |
| json.dump(summary, f, indent=2) | |
| def refresh_data(output_dir: Path) -> Dict[str, Any]: | |
| """ | |
| Fetch fresh data from all providers and save to output_dir. | |
| Returns: | |
| The fetched data dict (with summary) for API response | |
| """ | |
| data = fetch_all_providers() | |
| save_data(data, output_dir) | |
| return data | |