Spaces:
Running
Running
| { | |
| "meta": { | |
| "subject_model": "Qwen/Qwen2.5-3B-Instruct", | |
| "dataset": "ForecastBench — forecastingresearch/forecastbench-datasets (1,646 resolved binary forecast questions, Jul 2024 – Dec 2025)", | |
| "dataset_short": "ForecastBench", | |
| "dataset_license": "CC BY-SA 4.0", | |
| "dataset_url": "https://huggingface.co/datasets/forecastingresearch/forecastbench-datasets", | |
| "dataset_paper": "Karger et al., ForecastBench, ICLR 2025 — arXiv:2409.19839", | |
| "K": 10, | |
| "train_frac": 0.8, | |
| "n_total": 1646, | |
| "n_train": 1316, | |
| "n_holdout": 330, | |
| "license": "MIT", | |
| "author": "Serghei Brinza", | |
| "project": "Second Loop — Part 3 of 3", | |
| "repo": "https://github.com/SergheiBrinza/thin-channel" | |
| }, | |
| "schedules": [ | |
| { | |
| "name": "day", | |
| "period_days": 1, | |
| "holdout_brier_final": 0.20496566880877573, | |
| "holdout_brier_base_only": 0.21031801518975246, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1316.0 | |
| }, | |
| { | |
| "name": "3 days", | |
| "period_days": 3, | |
| "holdout_brier_final": 0.20496684828342504, | |
| "holdout_brier_base_only": 0.21031753069025927, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1315.0 | |
| }, | |
| { | |
| "name": "week", | |
| "period_days": 7, | |
| "holdout_brier_final": 0.2106551955314477, | |
| "holdout_brier_base_only": 0.21799144508408289, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1313.0 | |
| }, | |
| { | |
| "name": "2 weeks", | |
| "period_days": 14, | |
| "holdout_brier_final": 0.20966647823070747, | |
| "holdout_brier_base_only": 0.2173445920925949, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1313.0 | |
| }, | |
| { | |
| "name": "month", | |
| "period_days": 30, | |
| "holdout_brier_final": 0.2101098449226084, | |
| "holdout_brier_base_only": 0.2178103272838319, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1277.0 | |
| }, | |
| { | |
| "name": "2 months", | |
| "period_days": 60, | |
| "holdout_brier_final": 0.21982581362923156, | |
| "holdout_brier_base_only": 0.23042800795763183, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1173.0 | |
| }, | |
| { | |
| "name": "quarter", | |
| "period_days": 90, | |
| "holdout_brier_final": 0.2202173738348602, | |
| "holdout_brier_base_only": 0.23124628600106487, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 1022.0 | |
| }, | |
| { | |
| "name": "half year", | |
| "period_days": 180, | |
| "holdout_brier_final": 0.20868845602323596, | |
| "holdout_brier_base_only": 0.2196620786086268, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 417.0 | |
| }, | |
| { | |
| "name": "270 days", | |
| "period_days": 270, | |
| "holdout_brier_final": 0.21648459384888208, | |
| "holdout_brier_base_only": 0.23152855914970502, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 220.0 | |
| }, | |
| { | |
| "name": "year", | |
| "period_days": 365, | |
| "holdout_brier_final": 0.2195012177350905, | |
| "holdout_brier_base_only": 0.21615697496600672, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 417.0 | |
| }, | |
| { | |
| "name": "never", | |
| "period_days": null, | |
| "holdout_brier_final": 0.2979618181818181, | |
| "holdout_brier_base_only": 0.25, | |
| "holdout_brier_raw": 0.2979618181818181, | |
| "total_revealed": 0.0 | |
| } | |
| ], | |
| "ablations": { | |
| "n_holdout": 330, | |
| "true_YES_rate": 0.276, | |
| "brier_a_raw": 0.298, | |
| "brier_b_base_only": 0.189, | |
| "brier_c_mix": 0.1889, | |
| "delta_c_vs_a": -0.1091, | |
| "delta_c_vs_b": -0.0001 | |
| }, | |
| "wide_baseline_overall": { | |
| "brier": 0.354, | |
| "mean_P": 0.486, | |
| "true_YES_rate": 0.34, | |
| "reference_brier_alwaysNO": 0.34, | |
| "reference_brier_baserate": 0.2244 | |
| }, | |
| "per_topic_beta": { | |
| "AI & Tech": { | |
| "alpha": 9.0, | |
| "beta": 39.0, | |
| "base": 0.1875, | |
| "n": 46.0, | |
| "lam": 0.8214285714285714 | |
| }, | |
| "Climate & Weather": { | |
| "alpha": 20.0, | |
| "beta": 45.0, | |
| "base": 0.3076923076923077, | |
| "n": 63.0, | |
| "lam": 0.863013698630137 | |
| }, | |
| "Entertainment & Culture": { | |
| "alpha": 2.0, | |
| "beta": 8.0, | |
| "base": 0.2, | |
| "n": 8.0, | |
| "lam": 0.4444444444444444 | |
| }, | |
| "Geopolitics & Conflict": { | |
| "alpha": 15.0, | |
| "beta": 84.0, | |
| "base": 0.15151515151515152, | |
| "n": 97.0, | |
| "lam": 0.9065420560747663 | |
| }, | |
| "Macro & Financial Indicators": { | |
| "alpha": 54.0, | |
| "beta": 125.0, | |
| "base": 0.3016759776536313, | |
| "n": 177.0, | |
| "lam": 0.946524064171123 | |
| }, | |
| "Markets & Crypto": { | |
| "alpha": 243.0, | |
| "beta": 183.0, | |
| "base": 0.5704225352112676, | |
| "n": 424.0, | |
| "lam": 0.9769585253456221 | |
| }, | |
| "Politics & Elections": { | |
| "alpha": 22.0, | |
| "beta": 66.0, | |
| "base": 0.25, | |
| "n": 86.0, | |
| "lam": 0.8958333333333334 | |
| }, | |
| "Public Health": { | |
| "alpha": 2.0, | |
| "beta": 169.0, | |
| "base": 0.011695906432748537, | |
| "n": 169.0, | |
| "lam": 0.9441340782122905 | |
| }, | |
| "Sports & Games": { | |
| "alpha": 63.0, | |
| "beta": 185.0, | |
| "base": 0.2540322580645161, | |
| "n": 246.0, | |
| "lam": 0.9609375 | |
| } | |
| } | |
| } |