Spaces:
Running
Running
| from dataclasses import dataclass | |
| from enum import Enum | |
| from pathlib import Path | |
| import pandas as pd | |
| # Import HuggingFace Hub configuration | |
| from src.hf_config import get_results_root, get_config_root, get_features_root, initialize_data | |
| from src.utils import ( | |
| get_all_datasets_results, get_all_domains_and_freq, get_all_variates_results, | |
| get_dataset_choices, get_dataset_display_map, compute_ranks, | |
| load_features, load_all_features, binarize_features | |
| ) | |
| # ============================================================================= | |
| # Initialize data from HuggingFace Hub (or local for development) | |
| # ============================================================================= | |
| print("๐ Starting TIME Leaderboard initialization...") | |
| # Download/cache results and config from HuggingFace Hub | |
| RESULTS_ROOT, CONFIG_ROOT = initialize_data() | |
| # Get features root (local or HF) | |
| FEATURES_ROOT = get_features_root() | |
| # Get list of all models from results directory | |
| ALL_MODELS = [] | |
| if RESULTS_ROOT.exists(): | |
| ALL_MODELS = [p.name for p in RESULTS_ROOT.iterdir() if p.is_dir()] | |
| print(f"๐ Found {len(ALL_MODELS)} models: {ALL_MODELS}") | |
| # --------------------------------------------------- | |
| # Get dataset choices from TIME results (with smart display names) | |
| DATASET_CHOICES, DATASET_DISPLAY_TO_ID, DATASET_ID_TO_DISPLAY = get_dataset_choices(str(RESULTS_ROOT)) | |
| print(f"๐ Found {len(DATASET_CHOICES)} dataset configurations") | |
| # === Load data once at startup === | |
| DATASETS_DF = get_all_datasets_results(root_dir=str(RESULTS_ROOT)) | |
| if not DATASETS_DF.empty: | |
| # Use dataset_id (dataset/freq) for ranking to correctly handle multi-freq datasets | |
| DATASETS_DF = compute_ranks(DATASETS_DF, groupby_cols=['dataset_id', "horizon"]) # Rows: ๆฏไธ่กๆฏ1ไธช็ฌ็ซ็ๅฎ้ช num_model x num_dataset_id x num_horizons | |
| print(f"โ Loaded {len(DATASETS_DF)} dataset results") | |
| # === Load variate-level results for pattern-based leaderboard === | |
| print("๐ Loading variate-level results...") | |
| VARIATES_DF = get_all_variates_results(root_dir=str(RESULTS_ROOT)) | |
| if not VARIATES_DF.empty: | |
| # Compute ranks per (dataset_id, series_name, variate_name, horizon) | |
| VARIATES_DF = compute_ranks(VARIATES_DF, groupby_cols=['dataset_id', 'series_name', 'variate_name', 'horizon']) | |
| print(f"โ Loaded {len(VARIATES_DF)} variate-level results") | |
| else: | |
| print("โ ๏ธ No variate-level results found") | |
| # === Load features for pattern-based filtering === | |
| print("๐ Loading features...") | |
| FEATURES_DF = load_all_features(features_root=str(FEATURES_ROOT), split="test") | |
| if not FEATURES_DF.empty: | |
| print(f"โ Loaded {len(FEATURES_DF)} variate features") | |
| else: | |
| print("โ ๏ธ No features found") | |
| # Columns to exclude from binarization | |
| BINARIZE_EXCLUDE = [ | |
| 'dataset_id', 'series_name', 'variate_name', 'unique_id', | |
| 'mean', 'std', 'length', | |
| 'period1', 'period2', 'period3', | |
| 'p_strength1', 'p_strength2', 'p_strength3', | |
| 'missing_rate', | |
| # Meta features are already 0/1, handle separately | |
| 'is_random_walk', 'has_spike_presence', | |
| ] | |
| # Binarize numeric features by median | |
| FEATURES_BOOL_DF = pd.DataFrame() | |
| if not FEATURES_DF.empty: | |
| FEATURES_BOOL_DF = binarize_features(FEATURES_DF, exclude=BINARIZE_EXCLUDE) | |
| print(f"โ Binarized features for {len(FEATURES_BOOL_DF)} variates") | |
| if not DATASETS_DF.empty: | |
| OVERALL_TABLE_COLUMNS = ["model", "MASE", "CRPS", "MASE_rank", "CRPS_rank"] | |
| else: | |
| OVERALL_TABLE_COLUMNS = ["model", "MASE", "CRPS"] | |
| ALL_HORIZONS = ['short', 'medium', 'long'] | |
| # Pattern mapping: UI pattern name -> feature column name | |
| PATTERN_MAP = { | |
| # Trend patterns | |
| "T_strength": "trend_strength", | |
| "T_linearity": "linearity", | |
| "T_curvature": "curvature", | |
| # Seasonal patterns | |
| "S_strength": "seasonal_strength", | |
| "S_complexity": "seasonal_entropy", | |
| "S_corr": "seasonal_corr", | |
| # Residual patterns | |
| "R_diff1_ACF1": "e_diff1_acf1", | |
| "R_ACF1": "e_acf1", | |
| # Meta patterns | |
| "stationarity": "is_random_walk", # Note: stationarity = NOT is_random_walk | |
| "outlier_presence": "has_spike_presence", | |
| "complexity": "x_entropy", # High entropy = low predictability/high noise | |
| } | |
| # --------------------------------------------------- | |
| # Your leaderboard name | |
| TITLE = """<h1 align="center" id="space-title"> It's TIME</h1>""" | |
| # What does your leaderboard evaluate? | |
| INTRODUCTION_TEXT = """ | |
| TIME introduces a unified benchmark for time series probabilistic forecasting that supports evaluation at **multiple granularities**, ranging from overall performance across datasets to dataset-level, variate-level, and even individual test windows (with visualization). Beyond conventional analysis, the benchmark enables **pattern-driven, cross-dataset benchmarking** by grouping variates with similar temporal features, where patterns are defined based on groups of tsfeatures that capture properties such as trend, seasonality, and stationarity, offering a more systematic understanding of model behavior. For data and results, please refer to ๐ค [dataset](https://huggingface.co/datasets/TIME-benchmark/TIME-1.0/tree/main). | |
| """ | |
| # An integrated archive further enriches the platform by providing structural tsfeatures and statistical descriptors of all variates, | |
| # ensuring both comprehensive evaluation and transparent interpretability across diverse forecasting scenarios | |
| print("โ TIME Leaderboard initialization complete!") | |