Spaces:

Real-TSF
/

TIME-leaderboard

Running

App Files Files Community

TIME-leaderboard / src /about.py

zqiao11

Initial release

0b97f6a 6 days ago

raw

history blame contribute delete

5.42 kB

	from dataclasses import dataclass
	from enum import Enum
	from pathlib import Path
	import pandas as pd

	# Import HuggingFace Hub configuration
	from src.hf_config import get_results_root, get_config_root, get_features_root, initialize_data

	from src.utils import (
	get_all_datasets_results, get_all_domains_and_freq, get_all_variates_results,
	get_dataset_choices, get_dataset_display_map, compute_ranks,
	load_features, load_all_features, binarize_features
	)


	# =============================================================================
	# Initialize data from HuggingFace Hub (or local for development)
	# =============================================================================
	print("🚀 Starting TIME Leaderboard initialization...")

	# Download/cache results and config from HuggingFace Hub
	RESULTS_ROOT, CONFIG_ROOT = initialize_data()

	# Get features root (local or HF)
	FEATURES_ROOT = get_features_root()

	# Get list of all models from results directory
	ALL_MODELS = []
	if RESULTS_ROOT.exists():
	ALL_MODELS = [p.name for p in RESULTS_ROOT.iterdir() if p.is_dir()]
	print(f"📊 Found {len(ALL_MODELS)} models: {ALL_MODELS}")

	# ---------------------------------------------------
	# Get dataset choices from TIME results (with smart display names)
	DATASET_CHOICES, DATASET_DISPLAY_TO_ID, DATASET_ID_TO_DISPLAY = get_dataset_choices(str(RESULTS_ROOT))
	print(f"📁 Found {len(DATASET_CHOICES)} dataset configurations")

	# === Load data once at startup ===
	DATASETS_DF = get_all_datasets_results(root_dir=str(RESULTS_ROOT))
	if not DATASETS_DF.empty:
	# Use dataset_id (dataset/freq) for ranking to correctly handle multi-freq datasets
	DATASETS_DF = compute_ranks(DATASETS_DF, groupby_cols=['dataset_id', "horizon"]) # Rows: 每一行是1个独立的实验 num_model x num_dataset_id x num_horizons
	print(f"✅ Loaded {len(DATASETS_DF)} dataset results")

	# === Load variate-level results for pattern-based leaderboard ===
	print("📊 Loading variate-level results...")
	VARIATES_DF = get_all_variates_results(root_dir=str(RESULTS_ROOT))
	if not VARIATES_DF.empty:
	# Compute ranks per (dataset_id, series_name, variate_name, horizon)
	VARIATES_DF = compute_ranks(VARIATES_DF, groupby_cols=['dataset_id', 'series_name', 'variate_name', 'horizon'])
	print(f"✅ Loaded {len(VARIATES_DF)} variate-level results")
	else:
	print("⚠️ No variate-level results found")

	# === Load features for pattern-based filtering ===
	print("📊 Loading features...")
	FEATURES_DF = load_all_features(features_root=str(FEATURES_ROOT), split="test")
	if not FEATURES_DF.empty:
	print(f"✅ Loaded {len(FEATURES_DF)} variate features")
	else:
	print("⚠️ No features found")

	# Columns to exclude from binarization
	BINARIZE_EXCLUDE = [
	'dataset_id', 'series_name', 'variate_name', 'unique_id',
	'mean', 'std', 'length',
	'period1', 'period2', 'period3',
	'p_strength1', 'p_strength2', 'p_strength3',
	'missing_rate',
	# Meta features are already 0/1, handle separately
	'is_random_walk', 'has_spike_presence',
	]

	# Binarize numeric features by median
	FEATURES_BOOL_DF = pd.DataFrame()
	if not FEATURES_DF.empty:
	FEATURES_BOOL_DF = binarize_features(FEATURES_DF, exclude=BINARIZE_EXCLUDE)
	print(f"✅ Binarized features for {len(FEATURES_BOOL_DF)} variates")


	if not DATASETS_DF.empty:
	OVERALL_TABLE_COLUMNS = ["model", "MASE", "CRPS", "MASE_rank", "CRPS_rank"]
	else:
	OVERALL_TABLE_COLUMNS = ["model", "MASE", "CRPS"]


	ALL_HORIZONS = ['short', 'medium', 'long']

	# Pattern mapping: UI pattern name -> feature column name
	PATTERN_MAP = {
	# Trend patterns
	"T_strength": "trend_strength",
	"T_linearity": "linearity",
	"T_curvature": "curvature",
	# Seasonal patterns
	"S_strength": "seasonal_strength",
	"S_complexity": "seasonal_entropy",
	"S_corr": "seasonal_corr",
	# Residual patterns
	"R_diff1_ACF1": "e_diff1_acf1",
	"R_ACF1": "e_acf1",
	# Meta patterns
	"stationarity": "is_random_walk", # Note: stationarity = NOT is_random_walk
	"outlier_presence": "has_spike_presence",
	"complexity": "x_entropy", # High entropy = low predictability/high noise
	}
	# ---------------------------------------------------


	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title"> It's TIME</h1>"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	TIME introduces a unified benchmark for time series probabilistic forecasting that supports evaluation at multiple granularities, ranging from overall performance across datasets to dataset-level, variate-level, and even individual test windows (with visualization). Beyond conventional analysis, the benchmark enables pattern-driven, cross-dataset benchmarking by grouping variates with similar temporal features, where patterns are defined based on groups of tsfeatures that capture properties such as trend, seasonality, and stationarity, offering a more systematic understanding of model behavior. For data and results, please refer to 🤗 [dataset](https://huggingface.co/datasets/TIME-benchmark/TIME-1.0/tree/main).
	"""
	# An integrated archive further enriches the platform by providing structural tsfeatures and statistical descriptors of all variates,
	# ensuring both comprehensive evaluation and transparent interpretability across diverse forecasting scenarios
	print("✅ TIME Leaderboard initialization complete!")