Spaces:
Running
Running
File size: 5,423 Bytes
0b97f6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
import pandas as pd
# Import HuggingFace Hub configuration
from src.hf_config import get_results_root, get_config_root, get_features_root, initialize_data
from src.utils import (
get_all_datasets_results, get_all_domains_and_freq, get_all_variates_results,
get_dataset_choices, get_dataset_display_map, compute_ranks,
load_features, load_all_features, binarize_features
)
# =============================================================================
# Initialize data from HuggingFace Hub (or local for development)
# =============================================================================
print("๐ Starting TIME Leaderboard initialization...")
# Download/cache results and config from HuggingFace Hub
RESULTS_ROOT, CONFIG_ROOT = initialize_data()
# Get features root (local or HF)
FEATURES_ROOT = get_features_root()
# Get list of all models from results directory
ALL_MODELS = []
if RESULTS_ROOT.exists():
ALL_MODELS = [p.name for p in RESULTS_ROOT.iterdir() if p.is_dir()]
print(f"๐ Found {len(ALL_MODELS)} models: {ALL_MODELS}")
# ---------------------------------------------------
# Get dataset choices from TIME results (with smart display names)
DATASET_CHOICES, DATASET_DISPLAY_TO_ID, DATASET_ID_TO_DISPLAY = get_dataset_choices(str(RESULTS_ROOT))
print(f"๐ Found {len(DATASET_CHOICES)} dataset configurations")
# === Load data once at startup ===
DATASETS_DF = get_all_datasets_results(root_dir=str(RESULTS_ROOT))
if not DATASETS_DF.empty:
# Use dataset_id (dataset/freq) for ranking to correctly handle multi-freq datasets
DATASETS_DF = compute_ranks(DATASETS_DF, groupby_cols=['dataset_id', "horizon"]) # Rows: ๆฏไธ่กๆฏ1ไธช็ฌ็ซ็ๅฎ้ช num_model x num_dataset_id x num_horizons
print(f"โ
Loaded {len(DATASETS_DF)} dataset results")
# === Load variate-level results for pattern-based leaderboard ===
print("๐ Loading variate-level results...")
VARIATES_DF = get_all_variates_results(root_dir=str(RESULTS_ROOT))
if not VARIATES_DF.empty:
# Compute ranks per (dataset_id, series_name, variate_name, horizon)
VARIATES_DF = compute_ranks(VARIATES_DF, groupby_cols=['dataset_id', 'series_name', 'variate_name', 'horizon'])
print(f"โ
Loaded {len(VARIATES_DF)} variate-level results")
else:
print("โ ๏ธ No variate-level results found")
# === Load features for pattern-based filtering ===
print("๐ Loading features...")
FEATURES_DF = load_all_features(features_root=str(FEATURES_ROOT), split="test")
if not FEATURES_DF.empty:
print(f"โ
Loaded {len(FEATURES_DF)} variate features")
else:
print("โ ๏ธ No features found")
# Columns to exclude from binarization
BINARIZE_EXCLUDE = [
'dataset_id', 'series_name', 'variate_name', 'unique_id',
'mean', 'std', 'length',
'period1', 'period2', 'period3',
'p_strength1', 'p_strength2', 'p_strength3',
'missing_rate',
# Meta features are already 0/1, handle separately
'is_random_walk', 'has_spike_presence',
]
# Binarize numeric features by median
FEATURES_BOOL_DF = pd.DataFrame()
if not FEATURES_DF.empty:
FEATURES_BOOL_DF = binarize_features(FEATURES_DF, exclude=BINARIZE_EXCLUDE)
print(f"โ
Binarized features for {len(FEATURES_BOOL_DF)} variates")
if not DATASETS_DF.empty:
OVERALL_TABLE_COLUMNS = ["model", "MASE", "CRPS", "MASE_rank", "CRPS_rank"]
else:
OVERALL_TABLE_COLUMNS = ["model", "MASE", "CRPS"]
ALL_HORIZONS = ['short', 'medium', 'long']
# Pattern mapping: UI pattern name -> feature column name
PATTERN_MAP = {
# Trend patterns
"T_strength": "trend_strength",
"T_linearity": "linearity",
"T_curvature": "curvature",
# Seasonal patterns
"S_strength": "seasonal_strength",
"S_complexity": "seasonal_entropy",
"S_corr": "seasonal_corr",
# Residual patterns
"R_diff1_ACF1": "e_diff1_acf1",
"R_ACF1": "e_acf1",
# Meta patterns
"stationarity": "is_random_walk", # Note: stationarity = NOT is_random_walk
"outlier_presence": "has_spike_presence",
"complexity": "x_entropy", # High entropy = low predictability/high noise
}
# ---------------------------------------------------
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title"> It's TIME</h1>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
TIME introduces a unified benchmark for time series probabilistic forecasting that supports evaluation at **multiple granularities**, ranging from overall performance across datasets to dataset-level, variate-level, and even individual test windows (with visualization). Beyond conventional analysis, the benchmark enables **pattern-driven, cross-dataset benchmarking** by grouping variates with similar temporal features, where patterns are defined based on groups of tsfeatures that capture properties such as trend, seasonality, and stationarity, offering a more systematic understanding of model behavior. For data and results, please refer to ๐ค [dataset](https://huggingface.co/datasets/TIME-benchmark/TIME-1.0/tree/main).
"""
# An integrated archive further enriches the platform by providing structural tsfeatures and statistical descriptors of all variates,
# ensuring both comprehensive evaluation and transparent interpretability across diverse forecasting scenarios
print("โ
TIME Leaderboard initialization complete!")
|