thin-channel / data.json
Laborator's picture
Rebuild as interactive static demo: live reveal-schedule lever drives the Brier curve; correct dataset label to ForecastBench (CC BY-SA 4.0)
bc3caba verified
Raw
History Blame Contribute Delete
5.22 kB
{
"meta": {
"subject_model": "Qwen/Qwen2.5-3B-Instruct",
"dataset": "ForecastBench — forecastingresearch/forecastbench-datasets (1,646 resolved binary forecast questions, Jul 2024 – Dec 2025)",
"dataset_short": "ForecastBench",
"dataset_license": "CC BY-SA 4.0",
"dataset_url": "https://huggingface.co/datasets/forecastingresearch/forecastbench-datasets",
"dataset_paper": "Karger et al., ForecastBench, ICLR 2025 — arXiv:2409.19839",
"K": 10,
"train_frac": 0.8,
"n_total": 1646,
"n_train": 1316,
"n_holdout": 330,
"license": "MIT",
"author": "Serghei Brinza",
"project": "Second Loop — Part 3 of 3",
"repo": "https://github.com/SergheiBrinza/thin-channel"
},
"schedules": [
{
"name": "day",
"period_days": 1,
"holdout_brier_final": 0.20496566880877573,
"holdout_brier_base_only": 0.21031801518975246,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1316.0
},
{
"name": "3 days",
"period_days": 3,
"holdout_brier_final": 0.20496684828342504,
"holdout_brier_base_only": 0.21031753069025927,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1315.0
},
{
"name": "week",
"period_days": 7,
"holdout_brier_final": 0.2106551955314477,
"holdout_brier_base_only": 0.21799144508408289,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1313.0
},
{
"name": "2 weeks",
"period_days": 14,
"holdout_brier_final": 0.20966647823070747,
"holdout_brier_base_only": 0.2173445920925949,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1313.0
},
{
"name": "month",
"period_days": 30,
"holdout_brier_final": 0.2101098449226084,
"holdout_brier_base_only": 0.2178103272838319,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1277.0
},
{
"name": "2 months",
"period_days": 60,
"holdout_brier_final": 0.21982581362923156,
"holdout_brier_base_only": 0.23042800795763183,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1173.0
},
{
"name": "quarter",
"period_days": 90,
"holdout_brier_final": 0.2202173738348602,
"holdout_brier_base_only": 0.23124628600106487,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 1022.0
},
{
"name": "half year",
"period_days": 180,
"holdout_brier_final": 0.20868845602323596,
"holdout_brier_base_only": 0.2196620786086268,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 417.0
},
{
"name": "270 days",
"period_days": 270,
"holdout_brier_final": 0.21648459384888208,
"holdout_brier_base_only": 0.23152855914970502,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 220.0
},
{
"name": "year",
"period_days": 365,
"holdout_brier_final": 0.2195012177350905,
"holdout_brier_base_only": 0.21615697496600672,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 417.0
},
{
"name": "never",
"period_days": null,
"holdout_brier_final": 0.2979618181818181,
"holdout_brier_base_only": 0.25,
"holdout_brier_raw": 0.2979618181818181,
"total_revealed": 0.0
}
],
"ablations": {
"n_holdout": 330,
"true_YES_rate": 0.276,
"brier_a_raw": 0.298,
"brier_b_base_only": 0.189,
"brier_c_mix": 0.1889,
"delta_c_vs_a": -0.1091,
"delta_c_vs_b": -0.0001
},
"wide_baseline_overall": {
"brier": 0.354,
"mean_P": 0.486,
"true_YES_rate": 0.34,
"reference_brier_alwaysNO": 0.34,
"reference_brier_baserate": 0.2244
},
"per_topic_beta": {
"AI & Tech": {
"alpha": 9.0,
"beta": 39.0,
"base": 0.1875,
"n": 46.0,
"lam": 0.8214285714285714
},
"Climate & Weather": {
"alpha": 20.0,
"beta": 45.0,
"base": 0.3076923076923077,
"n": 63.0,
"lam": 0.863013698630137
},
"Entertainment & Culture": {
"alpha": 2.0,
"beta": 8.0,
"base": 0.2,
"n": 8.0,
"lam": 0.4444444444444444
},
"Geopolitics & Conflict": {
"alpha": 15.0,
"beta": 84.0,
"base": 0.15151515151515152,
"n": 97.0,
"lam": 0.9065420560747663
},
"Macro & Financial Indicators": {
"alpha": 54.0,
"beta": 125.0,
"base": 0.3016759776536313,
"n": 177.0,
"lam": 0.946524064171123
},
"Markets & Crypto": {
"alpha": 243.0,
"beta": 183.0,
"base": 0.5704225352112676,
"n": 424.0,
"lam": 0.9769585253456221
},
"Politics & Elections": {
"alpha": 22.0,
"beta": 66.0,
"base": 0.25,
"n": 86.0,
"lam": 0.8958333333333334
},
"Public Health": {
"alpha": 2.0,
"beta": 169.0,
"base": 0.011695906432748537,
"n": 169.0,
"lam": 0.9441340782122905
},
"Sports & Games": {
"alpha": 63.0,
"beta": 185.0,
"base": 0.2540322580645161,
"n": 246.0,
"lam": 0.9609375
}
}
}