nbroad's picture
Upload folder using huggingface_hub
dedd63e verified
"""
Fetch chart data from OpenRouter provider pages.
Used for refreshing dashboard data on demand.
"""
import json
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
import requests
PROVIDERS = [
"together",
"baseten",
"fireworks",
"novita",
"groq",
"nebius",
"openai",
"google-vertex",
"anthropic",
]
def extract_chart_data(url: str) -> List[Dict[str, Any]]:
"""
Fetch the page and extract chart data from Next.js hydration.
Args:
url: The OpenRouter provider URL
Returns:
List of data points with structure: {x: timestamp, ys: {model: tokens, ...}}
"""
response = requests.get(url, timeout=30)
response.raise_for_status()
html = response.text
# Find all self.__next_f.push() calls
pattern = r'self\.__next_f\.push\((.*?)\)(?=\s*</script>|self\.__next_f\.push)'
matches = re.findall(pattern, html, re.DOTALL)
chart_data = None
for match in matches:
try:
data = json.loads(match)
if len(data) >= 2:
content = data[1]
if isinstance(content, str):
json_array_pattern = r'\[{["\']x["\']\s*:\s*["\'][\d\-\s:]+["\'].*?}\]'
json_matches = re.findall(json_array_pattern, content, re.DOTALL)
for json_match in json_matches:
try:
parsed = json.loads(json_match)
if parsed and isinstance(parsed, list) and len(parsed) > 0:
if isinstance(parsed[0], dict) and "x" in parsed[0] and "ys" in parsed[0]:
chart_data = parsed
break
except (json.JSONDecodeError, ValueError):
continue
if chart_data:
break
except (json.JSONDecodeError, ValueError):
continue
return chart_data or []
def _fetch_provider_data(provider: str) -> Dict[str, Any]:
"""Fetch data for a single provider."""
url = f"https://openrouter.ai/provider/{provider}"
try:
data = extract_chart_data(url)
return {
"provider": provider,
"url": url,
"data": data,
"fetched_at": datetime.utcnow().isoformat(),
"success": True,
}
except Exception as e:
return {
"provider": provider,
"url": url,
"data": [],
"error": str(e),
"fetched_at": datetime.utcnow().isoformat(),
"success": False,
}
def fetch_all_providers(providers: List[str] | None = None, max_workers: int = 5) -> Dict[str, Any]:
"""
Fetch data from all providers in parallel.
Returns:
Consolidated data structure
"""
providers = providers or PROVIDERS
results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_provider = {
executor.submit(_fetch_provider_data, p): p for p in providers
}
for future in as_completed(future_to_provider):
results.append(future.result())
results.sort(key=lambda x: x["provider"])
return {
"fetched_at": datetime.utcnow().isoformat(),
"providers": results,
"summary": {
"total_providers": len(results),
"successful": sum(1 for r in results if r["success"]),
"failed": sum(1 for r in results if not r["success"]),
},
}
def save_data(data: Dict[str, Any], output_dir: Path) -> None:
"""Save fetched data to the output directory."""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
providers_path = output_path / "providers"
providers_path.mkdir(exist_ok=True)
with open(output_path / "full_data.json", "w") as f:
json.dump(data, f, indent=2)
for provider_data in data["providers"]:
provider_file = providers_path / f"{provider_data['provider']}.json"
with open(provider_file, "w") as f:
json.dump(provider_data, f, indent=2)
summary = {
"fetched_at": data["fetched_at"],
"providers": {},
}
for provider_data in data["providers"]:
if provider_data["success"]:
models = set()
total_tokens = 0
date_range = None
for point in provider_data["data"]:
models.update(point["ys"].keys())
total_tokens += sum(point["ys"].values())
if provider_data["data"]:
date_range = {
"start": provider_data["data"][0]["x"],
"end": provider_data["data"][-1]["x"],
}
summary["providers"][provider_data["provider"]] = {
"data_points": len(provider_data["data"]),
"unique_models": len(models),
"total_tokens": total_tokens,
"date_range": date_range,
}
else:
summary["providers"][provider_data["provider"]] = {
"error": provider_data.get("error", "Unknown error"),
}
with open(output_path / "summary.json", "w") as f:
json.dump(summary, f, indent=2)
def refresh_data(output_dir: Path) -> Dict[str, Any]:
"""
Fetch fresh data from all providers and save to output_dir.
Returns:
The fetched data dict (with summary) for API response
"""
data = fetch_all_providers()
save_data(data, output_dir)
return data