File size: 5,215 Bytes
c104b43
 
 
bc3caba
 
 
 
 
c104b43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
{
  "meta": {
    "subject_model": "Qwen/Qwen2.5-3B-Instruct",
    "dataset": "ForecastBench — forecastingresearch/forecastbench-datasets (1,646 resolved binary forecast questions, Jul 2024 – Dec 2025)",
    "dataset_short": "ForecastBench",
    "dataset_license": "CC BY-SA 4.0",
    "dataset_url": "https://huggingface.co/datasets/forecastingresearch/forecastbench-datasets",
    "dataset_paper": "Karger et al., ForecastBench, ICLR 2025 — arXiv:2409.19839",
    "K": 10,
    "train_frac": 0.8,
    "n_total": 1646,
    "n_train": 1316,
    "n_holdout": 330,
    "license": "MIT",
    "author": "Serghei Brinza",
    "project": "Second Loop — Part 3 of 3",
    "repo": "https://github.com/SergheiBrinza/thin-channel"
  },
  "schedules": [
    {
      "name": "day",
      "period_days": 1,
      "holdout_brier_final": 0.20496566880877573,
      "holdout_brier_base_only": 0.21031801518975246,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1316.0
    },
    {
      "name": "3 days",
      "period_days": 3,
      "holdout_brier_final": 0.20496684828342504,
      "holdout_brier_base_only": 0.21031753069025927,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1315.0
    },
    {
      "name": "week",
      "period_days": 7,
      "holdout_brier_final": 0.2106551955314477,
      "holdout_brier_base_only": 0.21799144508408289,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1313.0
    },
    {
      "name": "2 weeks",
      "period_days": 14,
      "holdout_brier_final": 0.20966647823070747,
      "holdout_brier_base_only": 0.2173445920925949,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1313.0
    },
    {
      "name": "month",
      "period_days": 30,
      "holdout_brier_final": 0.2101098449226084,
      "holdout_brier_base_only": 0.2178103272838319,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1277.0
    },
    {
      "name": "2 months",
      "period_days": 60,
      "holdout_brier_final": 0.21982581362923156,
      "holdout_brier_base_only": 0.23042800795763183,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1173.0
    },
    {
      "name": "quarter",
      "period_days": 90,
      "holdout_brier_final": 0.2202173738348602,
      "holdout_brier_base_only": 0.23124628600106487,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 1022.0
    },
    {
      "name": "half year",
      "period_days": 180,
      "holdout_brier_final": 0.20868845602323596,
      "holdout_brier_base_only": 0.2196620786086268,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 417.0
    },
    {
      "name": "270 days",
      "period_days": 270,
      "holdout_brier_final": 0.21648459384888208,
      "holdout_brier_base_only": 0.23152855914970502,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 220.0
    },
    {
      "name": "year",
      "period_days": 365,
      "holdout_brier_final": 0.2195012177350905,
      "holdout_brier_base_only": 0.21615697496600672,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 417.0
    },
    {
      "name": "never",
      "period_days": null,
      "holdout_brier_final": 0.2979618181818181,
      "holdout_brier_base_only": 0.25,
      "holdout_brier_raw": 0.2979618181818181,
      "total_revealed": 0.0
    }
  ],
  "ablations": {
    "n_holdout": 330,
    "true_YES_rate": 0.276,
    "brier_a_raw": 0.298,
    "brier_b_base_only": 0.189,
    "brier_c_mix": 0.1889,
    "delta_c_vs_a": -0.1091,
    "delta_c_vs_b": -0.0001
  },
  "wide_baseline_overall": {
    "brier": 0.354,
    "mean_P": 0.486,
    "true_YES_rate": 0.34,
    "reference_brier_alwaysNO": 0.34,
    "reference_brier_baserate": 0.2244
  },
  "per_topic_beta": {
    "AI & Tech": {
      "alpha": 9.0,
      "beta": 39.0,
      "base": 0.1875,
      "n": 46.0,
      "lam": 0.8214285714285714
    },
    "Climate & Weather": {
      "alpha": 20.0,
      "beta": 45.0,
      "base": 0.3076923076923077,
      "n": 63.0,
      "lam": 0.863013698630137
    },
    "Entertainment & Culture": {
      "alpha": 2.0,
      "beta": 8.0,
      "base": 0.2,
      "n": 8.0,
      "lam": 0.4444444444444444
    },
    "Geopolitics & Conflict": {
      "alpha": 15.0,
      "beta": 84.0,
      "base": 0.15151515151515152,
      "n": 97.0,
      "lam": 0.9065420560747663
    },
    "Macro & Financial Indicators": {
      "alpha": 54.0,
      "beta": 125.0,
      "base": 0.3016759776536313,
      "n": 177.0,
      "lam": 0.946524064171123
    },
    "Markets & Crypto": {
      "alpha": 243.0,
      "beta": 183.0,
      "base": 0.5704225352112676,
      "n": 424.0,
      "lam": 0.9769585253456221
    },
    "Politics & Elections": {
      "alpha": 22.0,
      "beta": 66.0,
      "base": 0.25,
      "n": 86.0,
      "lam": 0.8958333333333334
    },
    "Public Health": {
      "alpha": 2.0,
      "beta": 169.0,
      "base": 0.011695906432748537,
      "n": 169.0,
      "lam": 0.9441340782122905
    },
    "Sports & Games": {
      "alpha": 63.0,
      "beta": 185.0,
      "base": 0.2540322580645161,
      "n": 246.0,
      "lam": 0.9609375
    }
  }
}