MidAtBest commited on
Commit
9fc7bf0
·
1 Parent(s): b59506b

feat: update with real data points

Browse files
Files changed (3) hide show
  1. data/bed_dataset.csv +247 -13
  2. data/bigwig_dataset.csv +0 -0
  3. src/streamlit_app.py +115 -75
data/bed_dataset.csv CHANGED
@@ -1,13 +1,247 @@
1
- species,datasets,MCC
2
- Human,Intron,"[0.893,0.170,0.006,0.828,0.235,0.114,0.707,0.904,0.829,0.474]"
3
- Human,Exon,"[0.294,0.007,0.428,0.341,0.757,0.442,0.512,0.860,0.884,0.873]"
4
- Human,Splice_acceptor,"[0.057,0.279,0.129,0.844,0.272,0.174,0.082,0.603,0.277,0.448]"
5
- Human,Start_codon,"[0.719,0.304,0.482,0.019,0.302,0.942,0.924,0.982,0.982,0.161]"
6
- Cattle,Intron,"[0.853,0.969,0.585,0.609,0.127,0.842,0.814,0.147,0.472,0.258]"
7
- Cattle,Exon,"[0.163,0.118,0.152,0.525,0.179,0.967,0.574,0.897,0.593,0.454]"
8
- Cattle,Splice_acceptor,"[0.977,0.332,0.505,0.069,0.928,0.780,0.618,0.525,0.787,0.741]"
9
- Cattle,Start_codon,"[0.499,0.858,0.155,0.246,0.494,0.853,0.439,0.853,0.882,0.295]"
10
- Tomato,Intron,"[0.171,0.995,0.512,0.446,0.816,0.344,0.637,0.492,0.992,0.526]"
11
- Tomato,Exon,"[0.751,0.186,0.778,0.341,0.853,0.711,0.161,0.559,0.204,0.153]"
12
- Tomato,Splice_acceptor,"[0.073,0.688,0.568,0.669,0.910,0.581,0.168,0.687,0.928,0.821]"
13
- Tomato,Start_codon,"[0.491,0.743,0.172,0.351,0.675,0.845,0.077,0.593,0.552,0.089]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MCC,model_name,species,datasets
2
+ 0.334637850522995,NTv2 500M,cattle,intron
3
+ 0.1238768473267555,BPNet arch. 6M,cattle,intron
4
+ 0.383470207452774,Residual CNN 44M,cattle,intron
5
+ 0.3828243613243103,HyenaDNA 7M,cattle,intron
6
+ 0.4733810424804687,Caduceus 7M,cattle,intron
7
+ 0.4315277338027954,Evo2 1B,cattle,intron
8
+ 0.5455867648124695,NTv3 8M (pre),cattle,intron
9
+ 0.5453664064407349,NTv3 100M (pre),cattle,intron
10
+ 0.5628412365913391,NTv3 650M (pre),cattle,intron
11
+ 0.5682631134986877,NTv3 650M (post),cattle,intron
12
+ 0.3689357042312622,NTv2 500M,cattle,exon
13
+ 0.3250860869884491,BPNet arch. 6M,cattle,exon
14
+ 0.4674676060676574,Residual CNN 44M,cattle,exon
15
+ 0.2207767516374588,HyenaDNA 7M,cattle,exon
16
+ 0.4960922300815582,Caduceus 7M,cattle,exon
17
+ 0.4969632029533386,Evo2 1B,cattle,exon
18
+ 0.5432836413383484,NTv3 8M (pre),cattle,exon
19
+ 0.5531933307647705,NTv3 100M (pre),cattle,exon
20
+ 0.591151773929596,NTv3 650M (pre),cattle,exon
21
+ 0.6253225207328796,NTv3 650M (post),cattle,exon
22
+ 0.118808165192604,NTv2 500M,cattle,splice acceptor
23
+ 0.4715546369552612,BPNet arch. 6M,cattle,splice acceptor
24
+ 0.6620649099349976,Residual CNN 44M,cattle,splice acceptor
25
+ 0.104436807334423,HyenaDNA 7M,cattle,splice acceptor
26
+ 0.7064619660377502,Caduceus 7M,cattle,splice acceptor
27
+ 0.2085049450397491,Evo2 1B,cattle,splice acceptor
28
+ 0.7254849076271057,NTv3 8M (pre),cattle,splice acceptor
29
+ 0.7404072880744934,NTv3 100M (pre),cattle,splice acceptor
30
+ 0.7732946872711182,NTv3 650M (pre),cattle,splice acceptor
31
+ 0.7679624557495117,NTv3 650M (post),cattle,splice acceptor
32
+ 0.1412438601255417,NTv2 500M,cattle,start codon
33
+ 0.1490814685821533,BPNet arch. 6M,cattle,start codon
34
+ 0.3243320286273956,Residual CNN 44M,cattle,start codon
35
+ 0.056509330868721,HyenaDNA 7M,cattle,start codon
36
+ 0.3455557227134704,Caduceus 7M,cattle,start codon
37
+ 0.1030694246292114,Evo2 1B,cattle,start codon
38
+ 0.5275959968566895,NTv3 8M (pre),cattle,start codon
39
+ 0.4962065815925598,NTv3 100M (pre),cattle,start codon
40
+ 0.5591813921928406,NTv3 650M (pre),cattle,start codon
41
+ 0.5492052435874939,NTv3 650M (post),cattle,start codon
42
+ 0.5492052435874939,NTv2 500M,cattle,start codon
43
+ 0.1015273928642273,BPNet arch. 6M,cattle,intron
44
+ 0.3299930691719055,Residual CNN 44M,cattle,intron
45
+ 0.3826011121273041,HyenaDNA 7M,cattle,intron
46
+ 0.5564854741096497,Caduceus 7M,cattle,intron
47
+ 0.5564854741096497,NTv2 500M,cattle,intron
48
+ 0.323502242565155,BPNet arch. 6M,cattle,exon
49
+ 0.519285261631012,Residual CNN 44M,cattle,exon
50
+ 0.1038060635328292,HyenaDNA 7M,cattle,splice acceptor
51
+ 0.1038060635328292,Caduceus 7M,cattle,splice acceptor
52
+ 0.1038060635328292,NTv2 500M,cattle,splice acceptor
53
+ 0.4435675740242004,BPNet arch. 6M,cattle,splice acceptor
54
+ 0.6590774655342102,Residual CNN 44M,cattle,splice acceptor
55
+ 0.1038060635328292,HyenaDNA 7M,cattle,splice acceptor
56
+ 0.1038060635328292,Caduceus 7M,cattle,splice acceptor
57
+ 0.1038060635328292,NTv2 500M,cattle,splice acceptor
58
+ 0.0901669710874557,BPNet arch. 6M,cattle,start codon
59
+ 0.3548502624034881,Residual CNN 44M,cattle,start codon
60
+ 0.0545537285506725,HyenaDNA 7M,cattle,start codon
61
+ 0.0545537285506725,Caduceus 7M,cattle,start codon
62
+ 0.0639578104019165,BPNet arch. 6M,cattle,start codon
63
+ 0.3266464471817016,Residual CNN 44M,cattle,intron
64
+ 0.3266464471817016,HyenaDNA 7M,cattle,intron
65
+ 0.3266464471817016,Caduceus 7M,cattle,intron
66
+ 0.1383400112390518,BPNet arch. 6M,cattle,intron
67
+ 0.4856111407279968,Residual CNN 44M,cattle,exon
68
+ 0.4856111407279968,HyenaDNA 7M,cattle,exon
69
+ 0.4856111407279968,Caduceus 7M,cattle,exon
70
+ 0.4220209121704101,BPNet arch. 6M,cattle,splice acceptor
71
+ 0.689546525478363,Residual CNN 44M,cattle,splice acceptor
72
+ 0.689546525478363,HyenaDNA 7M,cattle,splice acceptor
73
+ 0.689546525478363,Caduceus 7M,cattle,splice acceptor
74
+ 0.0930091217160224,BPNet arch. 6M,cattle,start codon
75
+ 0.423166275024414,Residual CNN 44M,cattle,start codon
76
+ 0.423166275024414,HyenaDNA 7M,cattle,start codon
77
+ 0.423166275024414,Caduceus 7M,cattle,start codon
78
+ 0.4777896404266357,NTv2 500M,tomato,intron
79
+ 0.3216900527477264,BPNet arch. 6M,tomato,intron
80
+ 0.46840900182724,Residual CNN 44M,tomato,intron
81
+ 0.5251263380050659,PlantCAD2 88M,tomato,intron
82
+ 0.747674286365509,Evo2 1B,tomato,intron
83
+ 0.6858112812042236,NTv3 8M (pre),tomato,intron
84
+ 0.7038365006446838,NTv3 100M (pre),tomato,intron
85
+ 0.7481895685195923,NTv3 650M (pre),tomato,intron
86
+ 0.7458349466323853,NTv3 650M (post),tomato,intron
87
+ 0.6147475838661194,NTv2 500M,tomato,exon
88
+ 0.4551227986812591,BPNet arch. 6M,tomato,exon
89
+ 0.5068296194076538,Residual CNN 44M,tomato,exon
90
+ 0.7256030440330505,PlantCAD2 88M,tomato,exon
91
+ 0.7006198763847351,Evo2 1B,tomato,exon
92
+ 0.7537696361541748,NTv3 8M (pre),tomato,exon
93
+ 0.7484462857246399,NTv3 100M (pre),tomato,exon
94
+ 0.764011561870575,NTv3 650M (pre),tomato,exon
95
+ 0.7750575542449951,NTv3 650M (post),tomato,exon
96
+ 0.1691933125257492,NTv2 500M,tomato,splice acceptor
97
+ 0.125656172633171,BPNet arch. 6M,tomato,splice acceptor
98
+ 0.4359458982944488,Residual CNN 44M,tomato,splice acceptor
99
+ 0.744257926940918,PlantCAD2 88M,tomato,splice acceptor
100
+ 0.3791649639606476,Evo2 1B,tomato,splice acceptor
101
+ 0.6623862385749817,NTv3 8M (pre),tomato,splice acceptor
102
+ 0.6843105554580688,NTv3 100M (pre),tomato,splice acceptor
103
+ 0.7641868591308594,NTv3 650M (pre),tomato,splice acceptor
104
+ 0.7584431767463684,NTv3 650M (post),tomato,splice acceptor
105
+ 0.132934883236885,NTv2 500M,tomato,start codon
106
+ 0.0,BPNet arch. 6M,tomato,start codon
107
+ 0.088478960096836,Residual CNN 44M,tomato,start codon
108
+ 0.2019559442996978,PlantCAD2 88M,tomato,start codon
109
+ 0.1622217148542404,Evo2 1B,tomato,start codon
110
+ 0.2966536581516266,NTv3 8M (pre),tomato,start codon
111
+ 0.3968957066535949,NTv3 100M (pre),tomato,start codon
112
+ 0.4830105900764465,NTv3 650M (pre),tomato,start codon
113
+ 0.5007501244544983,NTv3 650M (post),tomato,start codon
114
+ 0.6770024299621582,NTv2 500M,tomato,intron
115
+ 0.2927957773208618,BPNet arch. 6M,tomato,intron
116
+ 0.1383400112390518,Residual CNN 44M,tomato,intron
117
+ 0.1383400112390518,PlantCAD2 88M,tomato,intron
118
+ 0.5751976370811462,NTv2 500M,tomato,exon
119
+ 0.3057552278041839,BPNet arch. 6M,tomato,exon
120
+ 0.168193981051445,NTv2 500M,tomato,splice acceptor
121
+ 0.0,BPNet arch. 6M,tomato,splice acceptor
122
+ 0.4833243191242218,Residual CNN 44M,tomato,splice acceptor
123
+ 0.4833243191242218,PlantCAD2 88M,tomato,splice acceptor
124
+ 0.1586925536394119,NTv2 500M,tomato,start codon
125
+ 0.0,BPNet arch. 6M,tomato,start codon
126
+ 0.1107296794652938,Residual CNN 44M,tomato,start codon
127
+ 0.1107296794652938,PlantCAD2 88M,tomato,start codon
128
+ 0.3502058088779449,BPNet arch. 6M,tomato,intron
129
+ 0.5514466166496277,Residual CNN 44M,tomato,intron
130
+ 0.5514466166496277,PlantCAD2 88M,tomato,intron
131
+ 0.3020758032798767,BPNet arch. 6M,tomato,exon
132
+ 0.4746756553649902,Residual CNN 44M,tomato,exon
133
+ 0.4746756553649902,PlantCAD2 88M,tomato,exon
134
+ 0.0,BPNet arch. 6M,tomato,splice acceptor
135
+ 0.3391502797603607,Residual CNN 44M,tomato,splice acceptor
136
+ 0.3391502797603607,PlantCAD2 88M,tomato,splice acceptor
137
+ 0.0639578104019165,BPNet arch. 6M,tomato,start codon
138
+ 0.0914037525653839,Residual CNN 44M,tomato,start codon
139
+ 0.0914037525653839,PlantCAD2 88M,tomato,start codon
140
+ 0.1995969861745834,NTv2 500M,human,intron
141
+ 0.0296161584556102,BPNet arch. 6M,human,intron
142
+ 0.2347834408283233,Residual CNN 44M,human,intron
143
+ 0.33451908826828,HyenaDNA 7M,human,intron
144
+ 0.4144788980484009,Caduceus 7M,human,intron
145
+ 0.0,Evo2 1B,human,intron
146
+ 0.4695742726325989,NTv3 8M (pre),human,intron
147
+ 0.475054919719696,NTv3 100M (pre),human,intron
148
+ 0.5504136681556702,NTv3 650M (pre),human,intron
149
+ 0.5643875002861023,NTv3 650M (post),human,intron
150
+ 0.1995969861745834,NTv2 500M,human,intron
151
+ 0.2706590592861175,BPNet arch. 6M,human,exon
152
+ 0.2678671479225158,Residual CNN 44M,human,exon
153
+ 0.179698497056961,HyenaDNA 7M,human,exon
154
+ 0.5098947286605835,Caduceus 7M,human,exon
155
+ 0.4510694444179535,Evo2 1B,human,exon
156
+ 0.6089931726455688,NTv3 8M (pre),human,exon
157
+ 0.6492856740951538,NTv3 100M (pre),human,exon
158
+ 0.6975767016410828,NTv3 650M (pre),human,exon
159
+ 0.6822624206542969,NTv3 650M (post),human,exon
160
+ 0.1493269056081771,NTv2 500M,human,splice acceptor
161
+ 0.3807527124881744,BPNet arch. 6M,human,splice acceptor
162
+ 0.6632664203643799,Residual CNN 44M,human,splice acceptor
163
+ 0.1002769619226455,HyenaDNA 7M,human,splice acceptor
164
+ 0.7357247471809387,Caduceus 7M,human,splice acceptor
165
+ 0.1821079105138778,Evo2 1B,human,splice acceptor
166
+ 0.7726271748542786,NTv3 8M (pre),human,splice acceptor
167
+ 0.77947598695755,NTv3 100M (pre),human,splice acceptor
168
+ 0.8028115034103394,NTv3 650M (pre),human,splice acceptor
169
+ 0.7979229092597961,NTv3 650M (post),human,splice acceptor
170
+ 0.139576569199562,NTv2 500M,human,start codon
171
+ 0.1334401220083236,BPNet arch. 6M,human,start codon
172
+ 0.3876807987689972,Residual CNN 44M,human,start codon
173
+ 0.1003016158938407,HyenaDNA 7M,human,start codon
174
+ 0.3958532512187958,Caduceus 7M,human,start codon
175
+ 0.1399599611759185,Evo2 1B,human,start codon
176
+ 0.540923535823822,NTv3 8M (pre),human,start codon
177
+ 0.5464004278182983,NTv3 100M (pre),human,start codon
178
+ 0.6803378462791443,NTv3 650M (pre),human,start codon
179
+ 0.7310947179794312,NTv3 650M (post),human,start codon
180
+ 0.7310947179794312,NTv2 500M,human,start codon
181
+ 0.0172978900372982,BPNet arch. 6M,human,intron
182
+ 0.2740728259086609,Residual CNN 44M,human,intron
183
+ 0.3312098085880279,HyenaDNA 7M,human,intron
184
+ 0.5108950138092041,Caduceus 7M,human,intron
185
+ 0.5034915208816528,NTv3 8M (pre),human,intron
186
+ 0.5154411792755127,NTv3 100M (pre),human,intron
187
+ 0.5814740061759949,NTv3 650M (pre),human,intron
188
+ 0.5920455455780029,NTv3 650M (post),human,intron
189
+ 0.5920455455780029,NTv2 500M,human,intron
190
+ 0.2252149283885955,BPNet arch. 6M,human,exon
191
+ 0.4010578095912933,Residual CNN 44M,human,exon
192
+ 0.1851459741592407,HyenaDNA 7M,human,exon
193
+ 0.4599409103393554,Caduceus 7M,human,exon
194
+ 0.5931490063667297,NTv3 8M (pre),human,exon
195
+ 0.6058318018913269,NTv3 100M (pre),human,exon
196
+ 0.6738048791885376,NTv3 650M (pre),human,exon
197
+ 0.6738048791885376,NTv3 650M (post),human,exon
198
+ 0.6738048791885376,NTv2 500M,human,exon
199
+ 0.3751010596752167,BPNet arch. 6M,human,splice acceptor
200
+ 0.681228756904602,Residual CNN 44M,human,splice acceptor
201
+ 0.0252278540283441,HyenaDNA 7M,human,splice acceptor
202
+ 0.7485092878341675,Caduceus 7M,human,splice acceptor
203
+ 0.7772909998893738,NTv3 8M (pre),human,splice acceptor
204
+ 0.794090747833252,NTv3 100M (pre),human,splice acceptor
205
+ 0.8239933252334595,NTv3 650M (pre),human,splice acceptor
206
+ 0.804115891456604,NTv3 650M (post),human,splice acceptor
207
+ 0.804115891456604,NTv2 500M,human,splice acceptor
208
+ 0.0,BPNet arch. 6M,human,start codon
209
+ 0.3292546272277832,Residual CNN 44M,human,start codon
210
+ 0.0647941380739212,HyenaDNA 7M,human,start codon
211
+ 0.4505241215229034,Caduceus 7M,human,start codon
212
+ 0.60422682762146,NTv3 8M (pre),human,start codon
213
+ 0.6015576124191284,NTv3 100M (pre),human,start codon
214
+ 0.6452956795692444,NTv3 650M (pre),human,start codon
215
+ 0.6761345267295837,NTv3 650M (post),human,start codon
216
+ 0.0185965970158576,BPNet arch. 6M,human,intron
217
+ 0.2623045742511749,Residual CNN 44M,human,intron
218
+ 0.2623045742511749,HyenaDNA 7M,human,intron
219
+ 0.2623045742511749,Caduceus 7M,human,intron
220
+ 0.4804849028587341,NTv3 8M (pre),human,intron
221
+ 0.482195496559143,NTv3 100M (pre),human,intron
222
+ 0.5425574779510498,NTv3 650M (pre),human,intron
223
+ 0.5443048477172852,NTv3 650M (post),human,intron
224
+ 0.2360571771860122,BPNet arch. 6M,human,exon
225
+ 0.2360571771860122,Residual CNN 44M,human,exon
226
+ 0.2360571771860122,HyenaDNA 7M,human,exon
227
+ 0.2360571771860122,Caduceus 7M,human,exon
228
+ 0.6339762210845947,NTv3 8M (pre),human,exon
229
+ 0.6433913111686707,NTv3 100M (pre),human,exon
230
+ 0.6518793702125549,NTv3 650M (pre),human,exon
231
+ 0.6812491416931152,NTv3 650M (post),human,exon
232
+ 0.3842235207557678,BPNet arch. 6M,human,splice acceptor
233
+ 0.6810190081596375,Residual CNN 44M,human,splice acceptor
234
+ 0.6810190081596375,HyenaDNA 7M,human,splice acceptor
235
+ 0.6810190081596375,Caduceus 7M,human,splice acceptor
236
+ 0.7796080708503723,NTv3 8M (pre),human,splice acceptor
237
+ 0.7596970200538635,NTv3 100M (pre),human,splice acceptor
238
+ 0.7915040850639343,NTv3 650M (pre),human,splice acceptor
239
+ 0.7957100868225098,NTv3 650M (post),human,splice acceptor
240
+ 0.1114460304379463,BPNet arch. 6M,human,start codon
241
+ 0.3342535495758056,Residual CNN 44M,human,start codon
242
+ 0.3342535495758056,HyenaDNA 7M,human,start codon
243
+ 0.3342535495758056,Caduceus 7M,human,start codon
244
+ 0.5167152881622314,NTv3 8M (pre),human,start codon
245
+ 0.5340564250946045,NTv3 100M (pre),human,start codon
246
+ 0.6148532032966614,NTv3 650M (pre),human,start codon
247
+ 0.6582212448120117,NTv3 650M (post),human,start codon
data/bigwig_dataset.csv CHANGED
The diff for this file is too large to render. See raw diff
 
src/streamlit_app.py CHANGED
@@ -1,5 +1,4 @@
1
  from typing import List
2
- import ast
3
  import os
4
 
5
  import pandas as pd
@@ -10,47 +9,73 @@ import plotly.express as px
10
  # Page config (must be the first Streamlit command)
11
  # ---------------------------------------------------------------------
12
  st.set_page_config(
13
- page_title="Custom Model Benchmarks",
14
  layout="wide",
15
  )
16
 
17
  # ---------------------------------------------------------------------
18
  # Configuration
19
  # ---------------------------------------------------------------------
20
-
21
- MODEL_NAMES = [
22
- "NTv2 500M MS",
23
- "BPNet 6M",
24
- "SpliceAI 44M",
25
- "PlantCAD2 - Small 88M",
26
- "Evo2 1b BF16",
27
- "NTv3 8M",
28
- "NTv3 100M",
29
- "NTv3 650M",
30
- "NTv3 650M - post-trained",
31
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  MODEL_COLORS = {
34
- "NTv2 500M MS": "#1f77b4",
35
- "BPNet 6M": "#ff7f0e",
36
- "SpliceAI 44M": "#2ca02c",
37
- "PlantCAD2 - Small 88M": "#d62728",
38
- "Evo2 1b BF16": "#9467bd",
39
- "NTv3 8M": "#8c564b",
40
- "NTv3 100M": "#e377c2",
41
- "NTv3 650M": "#7f7f7f",
42
- "NTv3 650M - post-trained": "#bcbd22",
 
43
  }
44
 
 
 
45
  _LAST_UPDATED = "Dec 10, 2025"
46
  _INTRO = """
47
- Simple leaderboard over custom benchmarks.
48
 
49
  - **Pearson correlations (multi-assay)**: per-dataset scores across species and models.
50
  - **MCC (bed tracks)**: per-track MCC values across species and models.
51
 
52
- Each metric cell in the CSVs is a list of scores (one per model).
53
- We expand this to (Model × Species × Dataset) and aggregate according to your filters.
54
  """
55
 
56
  HERE = os.path.dirname(os.path.abspath(__file__)) # /app/src
@@ -59,6 +84,7 @@ DATA_DIR = os.path.join(PROJECT_ROOT, "data")
59
 
60
  PEARSON_PATH = os.path.join(DATA_DIR, "bigwig_dataset.csv")
61
  MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
 
62
  # ---------------------------------------------------------------------
63
  # Data loading & preprocessing
64
  # ---------------------------------------------------------------------
@@ -72,57 +98,71 @@ def load_raw_data():
72
  pearson_df.columns = [c.strip() for c in pearson_df.columns]
73
  mcc_df.columns = [c.strip() for c in mcc_df.columns]
74
 
75
- # Optional: basic sanity check on required columns
76
- # required_p = {"species", "datasets", "pearson correlation"}
77
- # required_m = {"species", "datasets", "MCC"}
78
- # missing_p = required_p - set(pearson_df.columns)
79
- # missing_m = required_m - set(mcc_df.columns)
80
- # if missing_p:
81
- # st.error(f"Pearson CSV missing columns: {missing_p}")
82
- # if missing_m:
83
- # st.error(f"MCC CSV missing columns: {missing_m}")
84
-
85
  return pearson_df, mcc_df
86
 
87
 
88
- def expand_metric_lists(df: pd.DataFrame, metric_col: str) -> pd.DataFrame:
 
89
  """
90
- Take a DataFrame where `metric_col` is a stringified list, and expand it
91
- into rows per Model, with scalar 'Score' and 'Model' columns.
 
 
 
 
 
 
 
 
 
 
92
  """
93
- rows = []
94
- for _, row in df.iterrows():
95
- raw = row[metric_col]
96
- try:
97
- values = ast.literal_eval(str(raw))
98
- except Exception:
99
- # Skip rows that don't parse correctly
100
- continue
101
-
102
- if not isinstance(values, (list, tuple)):
103
- continue
104
-
105
- n_models = min(len(MODEL_NAMES), len(values))
106
- for i in range(n_models):
107
- new_row = {
108
- "species": row["species"],
109
- "datasets": row["datasets"],
110
- "Model": MODEL_NAMES[i],
111
- "Score": float(values[i]),
112
- }
113
- if "assay_type" in row.index:
114
- new_row["assay_type"] = row["assay_type"]
115
- rows.append(new_row)
116
-
117
- return pd.DataFrame(rows)
118
 
 
 
 
119
 
120
- @st.cache_data
121
- def load_expanded_data():
122
- raw_pearson, raw_mcc = load_raw_data()
123
- pearson_expanded = expand_metric_lists(raw_pearson, "pearson correlation")
124
- mcc_expanded = expand_metric_lists(raw_mcc, "MCC")
125
- return pearson_expanded, mcc_expanded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
 
128
  _PEARSON_DF, _MCC_DF = load_expanded_data()
@@ -259,7 +299,7 @@ def sidebar_toggle(label: str, value: bool = False, key: str | None = None) -> b
259
 
260
 
261
  def main():
262
- st.title("🧬 Custom Model Benchmarks")
263
  st.markdown(_INTRO)
264
  st.markdown(f"_Last updated: **{_LAST_UPDATED}**_")
265
 
@@ -286,7 +326,7 @@ def main():
286
 
287
  # Assay toggles (Pearson only), based on filtered species
288
  if cfg.get("has_assay_type", False):
289
- st.sidebar.subheader("Assay types (Pearson only)")
290
  if selected_species:
291
  df_for_assays = df_bench[df_bench["species"].isin(selected_species)]
292
  else:
@@ -305,8 +345,8 @@ def main():
305
 
306
  # Bed track / dataset toggles (MCC only), based on species selection
307
  selected_datasets: List[str] = []
308
- if benchmark_name == "MCC (bed tracks)":
309
- st.sidebar.subheader("Bed tracks (datasets)")
310
  if selected_species:
311
  df_for_tracks = df_bench[df_bench["species"].isin(selected_species)]
312
  else:
@@ -318,7 +358,7 @@ def main():
318
  else:
319
  selected_datasets = []
320
 
321
- # Model toggles (we keep all models, regardless of benchmark; filters will prune)
322
  st.sidebar.subheader("Models")
323
  selected_models: List[str] = []
324
  for model in _ALL_MODELS:
 
1
  from typing import List
 
2
  import os
3
 
4
  import pandas as pd
 
9
  # Page config (must be the first Streamlit command)
10
  # ---------------------------------------------------------------------
11
  st.set_page_config(
12
+ page_title="NTv3 Benchmark",
13
  layout="wide",
14
  )
15
 
16
  # ---------------------------------------------------------------------
17
  # Configuration
18
  # ---------------------------------------------------------------------
19
+ COLORS = {
20
+ # Primary colors 1 (our models)
21
+ 'blue_0': '#004697', # Darkest allowable blue
22
+ 'blue_1': '#3973fc', # Main blue
23
+ 'blue_2': '#7ea4fc', # Medium blue
24
+ 'blue_3': '#c3d5fc', # Light blue (lightest allowable blue)
25
+ # Secondary colors 1
26
+ 'red_1': '#ff554d', # Medium red
27
+ 'red_2': '#ffe0de', # Light red
28
+ # Primary colors 2
29
+ 'green_1': '#00b050', # Darkest green
30
+ 'green_2': '#92d050', # Medium green
31
+ 'green_3': '#c6e0b4', # Light green (lightest allowable green)
32
+ # Secondary colors 2
33
+ 'gold_1': '#fdb932',
34
+ # Tertiary colors
35
+ 'orange_1': '#ff975e',
36
+ 'purple_1': '#9a6ce4',
37
+ 'purple_2': '#bb9aef', # Medium purple
38
+ 'purple_3': '#ceb5f5', # Light purple (lightest allowable purple)
39
+ # Grays (other models)
40
+ 'gray_1': '#808080', # Darkest gray (use as a last resort)
41
+ 'gray_2': '#b3b3b3', # Medium gray (start with this as the darkest when possible)
42
+ 'gray_3': '#e6e6e6', # Lightest gray
43
+ 'gray_4': '#ffffff', # It's actually just white (use as a last resort)
44
+ # If all other options are exhausted
45
+ 'cyan_1': '#0096b4', # Darkest teal
46
+ 'cyan_2': '#28bed2', # Medium cyan
47
+ 'cyan_3': '#8cdceb', # Lightest cyan
48
+ 'magenta_1': '#b428a0', # Darkest magenta
49
+ 'magenta_2': '#dc50be', # Medium pink
50
+ 'magenta_3': '#f5a0dc', # Lightest pink
51
+ 'yellow_1': '#c8aa00', # Darkest yellow
52
+ 'yellow_2': '#ffd200', # Medium yellow
53
+ 'yellow_3': '#fff08c', # Lightest yellow
54
+ }
55
 
56
  MODEL_COLORS = {
57
+ "NTv3 650M (post)": COLORS['blue_0'],
58
+ 'NTv3 650M (pre)': COLORS['blue_1'], # #3973fc (Darkest blue)
59
+ 'NTv3 100M (pre)': COLORS['blue_2'], # #7ea4fc (Medium blue)
60
+ 'NTv3 8M (pre)': COLORS['blue_3'], # #c3d5fc (Light blue)
61
+ 'Evo2 1B': COLORS['green_3'], # #b3b3b3 (Medium gray)
62
+ "NTv2 500M": COLORS['gray_1'],
63
+ "BPNet arch. 6M": COLORS['cyan_1'],
64
+ "Residual CNN 44M": COLORS['magenta_1'],
65
+ "PlantCAD2 88M": COLORS["purple_1"],
66
+ "Caduceus 7M": COLORS["purple_2"]
67
  }
68
 
69
+ MODEL_NAMES = list(MODEL_COLORS.keys())
70
+
71
  _LAST_UPDATED = "Dec 10, 2025"
72
  _INTRO = """
73
+ Benchmark across gene annotation and functionnal tracks.
74
 
75
  - **Pearson correlations (multi-assay)**: per-dataset scores across species and models.
76
  - **MCC (bed tracks)**: per-track MCC values across species and models.
77
 
78
+ These tasks measure the model's ability the generalize to unseen tracks, species and assay types.
 
79
  """
80
 
81
  HERE = os.path.dirname(os.path.abspath(__file__)) # /app/src
 
84
 
85
  PEARSON_PATH = os.path.join(DATA_DIR, "bigwig_dataset.csv")
86
  MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
87
+
88
  # ---------------------------------------------------------------------
89
  # Data loading & preprocessing
90
  # ---------------------------------------------------------------------
 
98
  pearson_df.columns = [c.strip() for c in pearson_df.columns]
99
  mcc_df.columns = [c.strip() for c in mcc_df.columns]
100
 
 
 
 
 
 
 
 
 
 
 
101
  return pearson_df, mcc_df
102
 
103
 
104
+ @st.cache_data
105
+ def load_expanded_data():
106
  """
107
+ Load data in the new format where each row is already:
108
+ (species, [assay_type], datasets, model_name, metric)
109
+ and convert into a unified schema:
110
+ species, assay_type?, datasets, Model, Score
111
+
112
+ For Pearson:
113
+ If multiple rows share (species, assay_type, datasets, Model),
114
+ we average their Score.
115
+
116
+ For MCC:
117
+ If multiple rows share (species, datasets, Model),
118
+ we average their Score.
119
  """
120
+ pearson_df, mcc_df = load_raw_data()
121
+
122
+ # --- Pearson correlations ---
123
+ # Expect columns: species, assay_type, datasets, model_name, pearson correlation
124
+ pearson_df = pearson_df.rename(
125
+ columns={
126
+ "model_name": "Model",
127
+ "pearson correlation": "Score",
128
+ }
129
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ pearson_group_cols = ["species", "datasets", "Model"]
132
+ if "assay_type" in pearson_df.columns:
133
+ pearson_group_cols.append("assay_type")
134
 
135
+ pearson_df = (
136
+ pearson_df
137
+ .groupby(pearson_group_cols, as_index=False, dropna=False)["Score"]
138
+ .mean()
139
+ )
140
+
141
+ # --- MCC (bed tracks) ---
142
+ # Expect columns: species, datasets, model_name, MCC
143
+ mcc_df = mcc_df.rename(
144
+ columns={
145
+ "model_name": "Model",
146
+ "MCC": "Score",
147
+ }
148
+ )
149
+
150
+ # Collapse duplicates with same (species, datasets, Model)
151
+ mcc_group_cols = ["species", "datasets", "Model"]
152
+ mcc_df = (
153
+ mcc_df
154
+ .groupby(mcc_group_cols, as_index=False, dropna=False)["Score"]
155
+ .mean()
156
+ )
157
+
158
+ # Optional sanity checks
159
+ for df_name, df in [("pearson", pearson_df), ("mcc", mcc_df)]:
160
+ required = {"species", "datasets", "Model", "Score"}
161
+ missing = required - set(df.columns)
162
+ if missing:
163
+ st.error(f"{df_name} dataframe missing columns: {missing}")
164
+
165
+ return pearson_df, mcc_df
166
 
167
 
168
  _PEARSON_DF, _MCC_DF = load_expanded_data()
 
299
 
300
 
301
  def main():
302
+ st.title("🧬 NTv3 Benchmark")
303
  st.markdown(_INTRO)
304
  st.markdown(f"_Last updated: **{_LAST_UPDATED}**_")
305
 
 
326
 
327
  # Assay toggles (Pearson only), based on filtered species
328
  if cfg.get("has_assay_type", False):
329
+ st.sidebar.subheader("Assay types")
330
  if selected_species:
331
  df_for_assays = df_bench[df_bench["species"].isin(selected_species)]
332
  else:
 
345
 
346
  # Bed track / dataset toggles (MCC only), based on species selection
347
  selected_datasets: List[str] = []
348
+ if benchmark_name == "MCC":
349
+ st.sidebar.subheader("Genome annotations")
350
  if selected_species:
351
  df_for_tracks = df_bench[df_bench["species"].isin(selected_species)]
352
  else:
 
358
  else:
359
  selected_datasets = []
360
 
361
+ # Model toggles (we keep all models in MODEL_NAMES; filters + data will prune)
362
  st.sidebar.subheader("Models")
363
  selected_models: List[str] = []
364
  for model in _ALL_MODELS: