MidAtBest commited on
Commit
4c80381
·
1 Parent(s): d960a86

feat: use bernardo's dataset and fix scatterplot

Browse files
data/bed_dataset.csv DELETED
@@ -1,334 +0,0 @@
1
- MCC,model_name,running_time_hours,species,datasets
2
- 0.334637850522995,NTv2 500M,88.0,cattle,intron
3
- 0.1238768473267555,BPNet arch. 6M,4.0,cattle,intron
4
- 0.383470207452774,Residual CNN 44M,19.0,cattle,intron
5
- 0.3828243613243103,HyenaDNA 7M,23.0,cattle,intron
6
- 0.4733810424804687,Caduceus 7M,32.0,cattle,intron
7
- 0.4315277338027954,Evo2 1B,43.0,cattle,intron
8
- 0.5455867648124695,NTv3 8M (pre),1.0,cattle,intron
9
- 0.5453664064407349,NTv3 100M (pre),2.0,cattle,intron
10
- 0.5628412365913391,NTv3 650M (pre),5.0,cattle,intron
11
- 0.5682631134986877,NTv3 650M (pos),7.0,cattle,intron
12
- 0.3689357042312622,NTv2 500M,87.0,cattle,exon
13
- 0.3250860869884491,BPNet arch. 6M,4.0,cattle,exon
14
- 0.4674676060676574,Residual CNN 44M,19.0,cattle,exon
15
- 0.2207767516374588,HyenaDNA 7M,21.0,cattle,exon
16
- 0.4960922300815582,Caduceus 7M,32.0,cattle,exon
17
- 0.4969632029533386,Evo2 1B,44.0,cattle,exon
18
- 0.5432836413383484,NTv3 8M (pre),1.0,cattle,exon
19
- 0.5531933307647705,NTv3 100M (pre),2.0,cattle,exon
20
- 0.591151773929596,NTv3 650M (pre),5.0,cattle,exon
21
- 0.6253225207328796,NTv3 650M (pos),7.0,cattle,exon
22
- 0.118808165192604,NTv2 500M,86.0,cattle,splice acceptor
23
- 0.4715546369552612,BPNet arch. 6M,4.0,cattle,splice acceptor
24
- 0.6620649099349976,Residual CNN 44M,19.0,cattle,splice acceptor
25
- 0.104436807334423,HyenaDNA 7M,22.0,cattle,splice acceptor
26
- 0.7064619660377502,Caduceus 7M,30.0,cattle,splice acceptor
27
- 0.2085049450397491,Evo2 1B,43.0,cattle,splice acceptor
28
- 0.7254849076271057,NTv3 8M (pre),1.0,cattle,splice acceptor
29
- 0.7404072880744934,NTv3 100M (pre),2.0,cattle,splice acceptor
30
- 0.7732946872711182,NTv3 650M (pre),5.0,cattle,splice acceptor
31
- 0.7679624557495117,NTv3 650M (pos),7.0,cattle,splice acceptor
32
- 0.1412438601255417,NTv2 500M,88.0,cattle,start codon
33
- 0.1490814685821533,BPNet arch. 6M,4.0,cattle,start codon
34
- 0.3243320286273956,Residual CNN 44M,19.0,cattle,start codon
35
- 0.056509330868721,HyenaDNA 7M,23.0,cattle,start codon
36
- 0.3455557227134704,Caduceus 7M,33.0,cattle,start codon
37
- 0.1030694246292114,Evo2 1B,43.0,cattle,start codon
38
- 0.5275959968566895,NTv3 8M (pre),1.0,cattle,start codon
39
- 0.4962065815925598,NTv3 100M (pre),2.0,cattle,start codon
40
- 0.5591813921928406,NTv3 650M (pre),5.0,cattle,start codon
41
- 0.5492052435874939,NTv3 650M (pos),7.0,cattle,start codon
42
- 0.0383123345673084,NTv2 500M,90.0,cattle,intron
43
- 0.1015273928642273,BPNet arch. 6M,7.0,cattle,intron
44
- 0.3299930691719055,Residual CNN 44M,23.0,cattle,intron
45
- 0.3826011121273041,HyenaDNA 7M,20.0,cattle,intron
46
- 0.5564854741096497,Caduceus 7M,36.0,cattle,intron
47
- 0.5645747780799866,NTv3 8M (pre),2.0,cattle,intron
48
- 0.5765650272369385,NTv3 100M (pre),2.0,cattle,intron
49
- 0.6140890121459961,NTv3 650M (pre),7.0,cattle,intron
50
- 0.6709504723548889,NTv3 650M (pos),10.0,cattle,intron
51
- 0.3665516376495361,NTv2 500M,88.0,cattle,exon
52
- 0.323502242565155,BPNet arch. 6M,7.0,cattle,exon
53
- 0.519285261631012,Residual CNN 44M,23.0,cattle,exon
54
- 0.2514283955097198,HyenaDNA 7M,23.0,cattle,exon
55
- 0.5072187781333923,Caduceus 7M,39.0,cattle,exon
56
- 0.593974232673645,NTv3 8M (pre),1.0,cattle,exon
57
- 0.6014777421951294,NTv3 100M (pre),2.0,cattle,exon
58
- 0.6433462500572205,NTv3 650M (pre),7.0,cattle,exon
59
- 0.6648420095443726,NTv3 650M (pos),9.0,cattle,exon
60
- 0.0937248468399047,NTv2 500M,89.0,cattle,splice acceptor
61
- 0.4435675740242004,BPNet arch. 6M,7.0,cattle,splice acceptor
62
- 0.6590774655342102,Residual CNN 44M,23.0,cattle,splice acceptor
63
- 0.1038060635328292,HyenaDNA 7M,21.0,cattle,splice acceptor
64
- 0.6937510371208191,Caduceus 7M,38.0,cattle,splice acceptor
65
- 0.7248824238777161,NTv3 8M (pre),1.0,cattle,splice acceptor
66
- 0.7345820069313049,NTv3 100M (pre),2.0,cattle,splice acceptor
67
- 0.7439091801643372,NTv3 650M (pre),7.0,cattle,splice acceptor
68
- 0.758992075920105,NTv3 650M (pos),9.0,cattle,splice acceptor
69
- 0.1127461418509483,NTv2 500M,88.0,cattle,start codon
70
- 0.0901669710874557,BPNet arch. 6M,7.0,cattle,start codon
71
- 0.3548502624034881,Residual CNN 44M,23.0,cattle,start codon
72
- 0.0545537285506725,HyenaDNA 7M,24.0,cattle,start codon
73
- 0.4038819670677185,Caduceus 7M,38.0,cattle,start codon
74
- 0.5045616030693054,NTv3 8M (pre),1.0,cattle,start codon
75
- 0.4762806594371795,NTv3 100M (pre),3.0,cattle,start codon
76
- 0.5610686540603638,NTv3 650M (pre),7.0,cattle,start codon
77
- 0.5782408118247986,NTv3 650M (pos),9.0,cattle,start codon
78
- 0.1547228246927261,NTv2 500M,85.0,cattle,intron
79
- 0.1383400112390518,BPNet arch. 6M,6.0,cattle,intron
80
- 0.3266464471817016,Residual CNN 44M,23.0,cattle,intron
81
- 0.4240079522132873,HyenaDNA 7M,23.0,cattle,intron
82
- 0.4552704095840454,Caduceus 7M,37.0,cattle,intron
83
- 0.5063548684120178,NTv3 8M (pre),1.0,cattle,intron
84
- 0.5619235038757324,NTv3 100M (pre),3.0,cattle,intron
85
- 0.531277596950531,NTv3 650M (pre),7.0,cattle,intron
86
- 0.6205132603645325,NTv3 650M (pos),9.0,cattle,intron
87
- 0.3413117229938507,NTv2 500M,87.0,cattle,exon
88
- 0.2900931537151336,BPNet arch. 6M,7.0,cattle,exon
89
- 0.4856111407279968,Residual CNN 44M,23.0,cattle,exon
90
- 0.2246854901313781,HyenaDNA 7M,70.0,cattle,exon
91
- 0.5370016098022461,Caduceus 7M,35.0,cattle,exon
92
- 0.5721412897109985,NTv3 8M (pre),2.0,cattle,exon
93
- 0.5819903612136841,NTv3 100M (pre),2.0,cattle,exon
94
- 0.6183731555938721,NTv3 650M (pre),7.0,cattle,exon
95
- 0.6233119964599609,NTv3 650M (pos),9.0,cattle,exon
96
- 0.1367750316858291,NTv2 500M,89.0,cattle,splice acceptor
97
- 0.4220209121704101,BPNet arch. 6M,7.0,cattle,splice acceptor
98
- 0.689546525478363,Residual CNN 44M,23.0,cattle,splice acceptor
99
- 0.1121769621968269,HyenaDNA 7M,69.0,cattle,splice acceptor
100
- 0.7314619421958923,Caduceus 7M,37.0,cattle,splice acceptor
101
- 0.74350905418396,NTv3 8M (pre),2.0,cattle,splice acceptor
102
- 0.746654748916626,NTv3 100M (pre),2.0,cattle,splice acceptor
103
- 0.7714020609855652,NTv3 650M (pre),7.0,cattle,splice acceptor
104
- 0.7809271812438965,NTv3 650M (pos),9.0,cattle,splice acceptor
105
- 0.0901266038417816,NTv2 500M,89.0,cattle,start codon
106
- 0.0930091217160224,BPNet arch. 6M,6.0,cattle,start codon
107
- 0.423166275024414,Residual CNN 44M,23.0,cattle,start codon
108
- 0.1253955662250518,HyenaDNA 7M,72.0,cattle,start codon
109
- 0.33419930934906,Caduceus 7M,37.0,cattle,start codon
110
- 0.4639334082603454,NTv3 8M (pre),1.0,cattle,start codon
111
- 0.5102551579475403,NTv3 100M (pre),2.0,cattle,start codon
112
- 0.5866840481758118,NTv3 650M (pre),7.0,cattle,start codon
113
- 0.588148832321167,NTv3 650M (pos),9.0,cattle,start codon
114
- 0.4777896404266357,NTv2 500M,33.0,tomato,intron
115
- 0.3216900527477264,BPNet arch. 6M,1.0,tomato,intron
116
- 0.46840900182724,Residual CNN 44M,6.0,tomato,intron
117
- 0.5251263380050659,PlantCAD2 88M,38.0,tomato,intron
118
- 0.747674286365509,Evo2 1B,13.0,tomato,intron
119
- 0.6858112812042236,NTv3 8M (pre),0.0,tomato,intron
120
- 0.7038365006446838,NTv3 100M (pre),0.0,tomato,intron
121
- 0.7481895685195923,NTv3 650M (pre),1.0,tomato,intron
122
- 0.7458349466323853,NTv3 650M (pos),2.0,tomato,intron
123
- 0.6147475838661194,NTv2 500M,33.0,tomato,exon
124
- 0.4551227986812591,BPNet arch. 6M,1.0,tomato,exon
125
- 0.5068296194076538,Residual CNN 44M,6.0,tomato,exon
126
- 0.7256030440330505,PlantCAD2 88M,37.0,tomato,exon
127
- 0.7006198763847351,Evo2 1B,14.0,tomato,exon
128
- 0.7537696361541748,NTv3 8M (pre),0.0,tomato,exon
129
- 0.7484462857246399,NTv3 100M (pre),0.0,tomato,exon
130
- 0.764011561870575,NTv3 650M (pre),1.0,tomato,exon
131
- 0.7750575542449951,NTv3 650M (pos),2.0,tomato,exon
132
- 0.1691933125257492,NTv2 500M,33.0,tomato,splice acceptor
133
- 0.125656172633171,BPNet arch. 6M,1.0,tomato,splice acceptor
134
- 0.4359458982944488,Residual CNN 44M,6.0,tomato,splice acceptor
135
- 0.744257926940918,PlantCAD2 88M,38.0,tomato,splice acceptor
136
- 0.3791649639606476,Evo2 1B,13.0,tomato,splice acceptor
137
- 0.6623862385749817,NTv3 8M (pre),0.0,tomato,splice acceptor
138
- 0.6843105554580688,NTv3 100M (pre),0.0,tomato,splice acceptor
139
- 0.7641868591308594,NTv3 650M (pre),1.0,tomato,splice acceptor
140
- 0.7584431767463684,NTv3 650M (pos),2.0,tomato,splice acceptor
141
- 0.132934883236885,NTv2 500M,34.0,tomato,start codon
142
- 0.0,BPNet arch. 6M,1.0,tomato,start codon
143
- 0.088478960096836,Residual CNN 44M,6.0,tomato,start codon
144
- 0.2019559442996978,PlantCAD2 88M,38.0,tomato,start codon
145
- 0.1622217148542404,Evo2 1B,13.0,tomato,start codon
146
- 0.2966536581516266,NTv3 8M (pre),0.0,tomato,start codon
147
- 0.3968957066535949,NTv3 100M (pre),0.0,tomato,start codon
148
- 0.4830105900764465,NTv3 650M (pre),1.0,tomato,start codon
149
- 0.5007501244544983,NTv3 650M (pos),2.0,tomato,start codon
150
- 0.6770024299621582,NTv2 500M,33.0,tomato,intron
151
- 0.2927957773208618,BPNet arch. 6M,2.0,tomato,intron
152
- 0.557494580745697,Residual CNN 44M,8.0,tomato,intron
153
- 0.7252154350280762,PlantCAD2 88M,46.0,tomato,intron
154
- 0.712181031703949,NTv3 8M (pre),1.0,tomato,intron
155
- 0.7515084147453308,NTv3 100M (pre),1.0,tomato,intron
156
- 0.7400797009468079,NTv3 650M (pre),3.0,tomato,intron
157
- 0.7532288432121277,NTv3 650M (pos),4.0,tomato,intron
158
- 0.5751976370811462,NTv2 500M,33.0,tomato,exon
159
- 0.3057552278041839,BPNet arch. 6M,3.0,tomato,exon
160
- 0.5581462979316711,Residual CNN 44M,8.0,tomato,exon
161
- 0.7699167728424072,PlantCAD2 88M,50.0,tomato,exon
162
- 0.748009443283081,NTv3 8M (pre),1.0,tomato,exon
163
- 0.7629056572914124,NTv3 100M (pre),1.0,tomato,exon
164
- 0.7755228877067566,NTv3 650M (pre),3.0,tomato,exon
165
- 0.782516598701477,NTv3 650M (pos),4.0,tomato,exon
166
- 0.168193981051445,NTv2 500M,33.0,tomato,splice acceptor
167
- 0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
168
- 0.4833243191242218,Residual CNN 44M,8.0,tomato,splice acceptor
169
- 0.7335307598114014,PlantCAD2 88M,46.0,tomato,splice acceptor
170
- 0.6908777952194214,NTv3 8M (pre),1.0,tomato,splice acceptor
171
- 0.7348777055740356,NTv3 100M (pre),1.0,tomato,splice acceptor
172
- 0.7484620809555054,NTv3 650M (pre),3.0,tomato,splice acceptor
173
- 0.7539154291152954,NTv3 650M (pos),4.0,tomato,splice acceptor
174
- 0.1586925536394119,NTv2 500M,33.0,tomato,start codon
175
- 0.0,BPNet arch. 6M,2.0,tomato,start codon
176
- 0.1107296794652938,Residual CNN 44M,8.0,tomato,start codon
177
- 0.3756755590438843,PlantCAD2 88M,48.0,tomato,start codon
178
- 0.4113904237747192,NTv3 8M (pre),1.0,tomato,start codon
179
- 0.4541433155536651,NTv3 100M (pre),1.0,tomato,start codon
180
- 0.5002310872077942,NTv3 650M (pre),3.0,tomato,start codon
181
- 0.5470007658004761,NTv3 650M (pos),4.0,tomato,start codon
182
- 0.6712294220924377,NTv2 500M,33.0,tomato,intron
183
- 0.3502058088779449,BPNet arch. 6M,2.0,tomato,intron
184
- 0.5514466166496277,Residual CNN 44M,8.0,tomato,intron
185
- 0.722817599773407,PlantCAD2 88M,88.0,tomato,intron
186
- 0.7013162970542908,NTv3 8M (pre),1.0,tomato,intron
187
- 0.747364342212677,NTv3 100M (pre),1.0,tomato,intron
188
- 0.752423107624054,NTv3 650M (pre),3.0,tomato,intron
189
- 0.7750566005706787,NTv3 650M (pos),4.0,tomato,intron
190
- 0.6022632718086243,NTv2 500M,33.0,tomato,exon
191
- 0.3020758032798767,BPNet arch. 6M,2.0,tomato,exon
192
- 0.4746756553649902,Residual CNN 44M,8.0,tomato,exon
193
- 0.7354215979576111,PlantCAD2 88M,45.0,tomato,exon
194
- 0.7157281041145325,NTv3 8M (pre),1.0,tomato,exon
195
- 0.7326820492744446,NTv3 100M (pre),1.0,tomato,exon
196
- 0.7308483123779297,NTv3 650M (pre),3.0,tomato,exon
197
- 0.7417197823524475,NTv3 650M (pos),4.0,tomato,exon
198
- 0.1558358669281005,NTv2 500M,33.0,tomato,splice acceptor
199
- 0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
200
- 0.3391502797603607,Residual CNN 44M,8.0,tomato,splice acceptor
201
- 0.7305923700332642,PlantCAD2 88M,85.0,tomato,splice acceptor
202
- 0.6977006196975708,NTv3 8M (pre),1.0,tomato,splice acceptor
203
- 0.6770275831222534,NTv3 100M (pre),1.0,tomato,splice acceptor
204
- 0.6770390272140503,NTv3 650M (pre),3.0,tomato,splice acceptor
205
- 0.7287323474884033,NTv3 650M (pos),4.0,tomato,splice acceptor
206
- 0.1887903958559036,NTv2 500M,33.0,tomato,start codon
207
- 0.0639578104019165,BPNet arch. 6M,2.0,tomato,start codon
208
- 0.0914037525653839,Residual CNN 44M,8.0,tomato,start codon
209
- 0.4881043434143066,PlantCAD2 88M,88.0,tomato,start codon
210
- 0.4309621453285217,NTv3 8M (pre),1.0,tomato,start codon
211
- 0.4028272926807403,NTv3 100M (pre),1.0,tomato,start codon
212
- 0.4060510396957397,NTv3 650M (pre),3.0,tomato,start codon
213
- 0.472331553697586,NTv3 650M (pos),4.0,tomato,start codon
214
- 0.1995969861745834,NTv2 500M,72.0,human,intron
215
- 0.0296161584556102,BPNet arch. 6M,3.0,human,intron
216
- 0.2347834408283233,Residual CNN 44M,15.0,human,intron
217
- 0.33451908826828,HyenaDNA 7M,17.0,human,intron
218
- 0.4144788980484009,Caduceus 7M,27.0,human,intron
219
- 0.0,Evo2 1B,34.0,human,intron
220
- 0.4695742726325989,NTv3 8M (pre),1.0,human,intron
221
- 0.475054919719696,NTv3 100M (pre),2.0,human,intron
222
- 0.5504136681556702,NTv3 650M (pre),5.0,human,intron
223
- 0.5643875002861023,NTv3 650M (pos),6.0,human,intron
224
- 0.0546500161290168,NTv2 500M,72.0,human,exon
225
- 0.2706590592861175,BPNet arch. 6M,3.0,human,exon
226
- 0.2678671479225158,Residual CNN 44M,15.0,human,exon
227
- 0.179698497056961,HyenaDNA 7M,19.0,human,exon
228
- 0.5098947286605835,Caduceus 7M,26.0,human,exon
229
- 0.4510694444179535,Evo2 1B,34.0,human,exon
230
- 0.6089931726455688,NTv3 8M (pre),1.0,human,exon
231
- 0.6492856740951538,NTv3 100M (pre),2.0,human,exon
232
- 0.6975767016410828,NTv3 650M (pre),5.0,human,exon
233
- 0.6822624206542969,NTv3 650M (pos),8.0,human,exon
234
- 0.1493269056081771,NTv2 500M,73.0,human,splice acceptor
235
- 0.3807527124881744,BPNet arch. 6M,3.0,human,splice acceptor
236
- 0.6632664203643799,Residual CNN 44M,15.0,human,splice acceptor
237
- 0.1002769619226455,HyenaDNA 7M,17.0,human,splice acceptor
238
- 0.7357247471809387,Caduceus 7M,24.0,human,splice acceptor
239
- 0.1821079105138778,Evo2 1B,34.0,human,splice acceptor
240
- 0.7726271748542786,NTv3 8M (pre),1.0,human,splice acceptor
241
- 0.77947598695755,NTv3 100M (pre),2.0,human,splice acceptor
242
- 0.8028115034103394,NTv3 650M (pre),5.0,human,splice acceptor
243
- 0.7979229092597961,NTv3 650M (pos),7.0,human,splice acceptor
244
- 0.139576569199562,NTv2 500M,73.0,human,start codon
245
- 0.1334401220083236,BPNet arch. 6M,3.0,human,start codon
246
- 0.3876807987689972,Residual CNN 44M,15.0,human,start codon
247
- 0.1003016158938407,HyenaDNA 7M,18.0,human,start codon
248
- 0.3958532512187958,Caduceus 7M,24.0,human,start codon
249
- 0.1399599611759185,Evo2 1B,34.0,human,start codon
250
- 0.540923535823822,NTv3 8M (pre),1.0,human,start codon
251
- 0.5464004278182983,NTv3 100M (pre),2.0,human,start codon
252
- 0.6803378462791443,NTv3 650M (pre),5.0,human,start codon
253
- 0.7310947179794312,NTv3 650M (pos),7.0,human,start codon
254
- 0.0814515128731727,NTv2 500M,72.0,human,intron
255
- 0.0172978900372982,BPNet arch. 6M,5.0,human,intron
256
- 0.2740728259086609,Residual CNN 44M,19.0,human,intron
257
- 0.3312098085880279,HyenaDNA 7M,21.0,human,intron
258
- 0.5108950138092041,Caduceus 7M,33.0,human,intron
259
- 0.5034915208816528,NTv3 8M (pre),1.0,human,intron
260
- 0.5154411792755127,NTv3 100M (pre),2.0,human,intron
261
- 0.5814740061759949,NTv3 650M (pre),6.0,human,intron
262
- 0.5920455455780029,NTv3 650M (pos),8.0,human,intron
263
- 0.3505669236183166,NTv2 500M,72.0,human,exon
264
- 0.2252149283885955,BPNet arch. 6M,5.0,human,exon
265
- 0.4010578095912933,Residual CNN 44M,18.0,human,exon
266
- 0.1851459741592407,HyenaDNA 7M,18.0,human,exon
267
- 0.4599409103393554,Caduceus 7M,33.0,human,exon
268
- 0.5931490063667297,NTv3 8M (pre),1.0,human,exon
269
- 0.6058318018913269,NTv3 100M (pre),2.0,human,exon
270
- 0.6738048791885376,NTv3 650M (pre),6.0,human,exon
271
- 0.6936564445495605,NTv3 650M (pos),8.0,human,exon
272
- 0.1533636748790741,NTv2 500M,72.0,human,splice acceptor
273
- 0.3751010596752167,BPNet arch. 6M,5.0,human,splice acceptor
274
- 0.681228756904602,Residual CNN 44M,19.0,human,splice acceptor
275
- 0.0252278540283441,HyenaDNA 7M,22.0,human,splice acceptor
276
- 0.7485092878341675,Caduceus 7M,35.0,human,splice acceptor
277
- 0.7772909998893738,NTv3 8M (pre),1.0,human,splice acceptor
278
- 0.794090747833252,NTv3 100M (pre),2.0,human,splice acceptor
279
- 0.8239933252334595,NTv3 650M (pre),6.0,human,splice acceptor
280
- 0.804115891456604,NTv3 650M (pos),8.0,human,splice acceptor
281
- 0.0851806029677391,NTv2 500M,72.0,human,start codon
282
- 0.0,BPNet arch. 6M,5.0,human,start codon
283
- 0.3292546272277832,Residual CNN 44M,19.0,human,start codon
284
- 0.0647941380739212,HyenaDNA 7M,20.0,human,start codon
285
- 0.4505241215229034,Caduceus 7M,33.0,human,start codon
286
- 0.60422682762146,NTv3 8M (pre),1.0,human,start codon
287
- 0.6015576124191284,NTv3 100M (pre),2.0,human,start codon
288
- 0.6452956795692444,NTv3 650M (pre),6.0,human,start codon
289
- 0.6761345267295837,NTv3 650M (pos),8.0,human,start codon
290
- 0.0558800511062145,NTv2 500M,70.0,human,intron
291
- 0.0185965970158576,BPNet arch. 6M,5.0,human,intron
292
- 0.2623045742511749,Residual CNN 44M,18.0,human,intron
293
- 0.3633092641830444,HyenaDNA 7M,68.0,human,intron
294
- 0.4261827170848846,Caduceus 7M,29.0,human,intron
295
- 0.4804849028587341,NTv3 8M (pre),1.0,human,intron
296
- 0.482195496559143,NTv3 100M (pre),2.0,human,intron
297
- 0.5425574779510498,NTv3 650M (pre),6.0,human,intron
298
- 0.5443048477172852,NTv3 650M (pos),8.0,human,intron
299
- 0.3958893716335296,NTv2 500M,71.0,human,exon
300
- 0.2360571771860122,BPNet arch. 6M,5.0,human,exon
301
- 0.3744256496429443,Residual CNN 44M,18.0,human,exon
302
- 0.1936572045087814,HyenaDNA 7M,68.0,human,exon
303
- 0.5046994090080261,Caduceus 7M,29.0,human,exon
304
- 0.6339762210845947,NTv3 8M (pre),1.0,human,exon
305
- 0.6433913111686707,NTv3 100M (pre),2.0,human,exon
306
- 0.6518793702125549,NTv3 650M (pre),6.0,human,exon
307
- 0.6812491416931152,NTv3 650M (pos),8.0,human,exon
308
- 0.1248077526688575,NTv2 500M,70.0,human,splice acceptor
309
- 0.3842235207557678,BPNet arch. 6M,5.0,human,splice acceptor
310
- 0.6810190081596375,Residual CNN 44M,18.0,human,splice acceptor
311
- 0.0527583621442317,HyenaDNA 7M,17.0,human,splice acceptor
312
- 0.7072214484214783,Caduceus 7M,29.0,human,splice acceptor
313
- 0.7796080708503723,NTv3 8M (pre),1.0,human,splice acceptor
314
- 0.7596970200538635,NTv3 100M (pre),2.0,human,splice acceptor
315
- 0.7915040850639343,NTv3 650M (pre),6.0,human,splice acceptor
316
- 0.7957100868225098,NTv3 650M (pos),8.0,human,splice acceptor
317
- 0.1267423331737518,NTv2 500M,70.0,human,start codon
318
- 0.1114460304379463,BPNet arch. 6M,5.0,human,start codon
319
- 0.3342535495758056,Residual CNN 44M,18.0,human,start codon
320
- 0.1215013489127159,HyenaDNA 7M,18.0,human,start codon
321
- 0.4082835018634796,Caduceus 7M,29.0,human,start codon
322
- 0.5167152881622314,NTv3 8M (pre),1.0,human,start codon
323
- 0.5340564250946045,NTv3 100M (pre),2.0,human,start codon
324
- 0.6148532032966614,NTv3 650M (pre),6.0,human,start codon
325
- 0.6582212448120117,NTv3 650M (pos),8.0,human,start codon
326
- 0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
327
- 0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
328
- 0.6582212448120117,Caduceus 7M,8.0,human,start codon
329
- 0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
330
- 0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
331
- 0.6582212448120117,Caduceus 7M,8.0,human,start codon
332
- 0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
333
- 0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
334
- 0.6582212448120117,Caduceus 7M,8.0,human,start codon
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/bigwig_dataset.csv DELETED
The diff for this file is too large to render. See raw diff
 
data/ntv3_benchmark_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
src/streamlit_app.py CHANGED
@@ -82,6 +82,7 @@ ASSAY_COLORS = {
82
  "splice acceptor": '#ff9900',
83
  "start codon": '#9933cc',
84
  }
 
85
 
86
  MODEL_COLORS = {
87
  "NTv3 650M (pos)": COLORS['blue_0'],
@@ -93,7 +94,8 @@ MODEL_COLORS = {
93
  "BPNet arch. 6M": COLORS['cyan_1'],
94
  "Residual CNN 44M": COLORS['magenta_1'],
95
  "PlantCAD2 88M": COLORS["purple_1"],
96
- "Caduceus 7M": COLORS["purple_2"]
 
97
  }
98
 
99
  MODEL_TRAINING_STATUS = {
@@ -107,6 +109,7 @@ MODEL_TRAINING_STATUS = {
107
  "NTv2 500M": "PRE",
108
  "BPNet arch. 6M": "SCRATCH",
109
  "PlantCAD2 88M": "PRE",
 
110
  }
111
 
112
  MODEL_GPU_MULTIPLIER = {
@@ -152,8 +155,7 @@ HERE = os.path.dirname(os.path.abspath(__file__)) # /app/src
152
  PROJECT_ROOT = os.path.dirname(HERE) # /app
153
  DATA_DIR = os.path.join(PROJECT_ROOT, "data")
154
 
155
- PEARSON_PATH = os.path.join(DATA_DIR, "bigwig_dataset.csv")
156
- MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
157
 
158
  # ---------------------------------------------------------------------
159
  # Data loading & preprocessing
@@ -162,122 +164,108 @@ MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
162
 
163
  @st.cache_data
164
  def load_raw_data():
165
- pearson_df = pd.read_csv(PEARSON_PATH)
166
- mcc_df = pd.read_csv(MCC_PATH)
 
167
 
168
- pearson_df.columns = [c.strip() for c in pearson_df.columns]
169
- mcc_df.columns = [c.strip() for c in mcc_df.columns]
170
 
171
- return pearson_df, mcc_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- def _normalize_training_hours(df: pd.DataFrame) -> pd.DataFrame:
174
- return df.rename(columns={"running_time_hours": "GPU hours"})
175
 
176
 
177
- @st.cache_data
178
- def load_expanded_data():
179
  """
180
- Load data in the new format where each row is already:
181
- (species, [assay_type], datasets, model_name, metric)
182
- and convert into a unified schema:
183
- species, assay_type?, datasets, Model, Score
184
-
185
- For Pearson:
186
- If multiple rows share (species, assay_type, datasets, Model),
187
- we average their Score.
188
-
189
- For MCC:
190
- If multiple rows share (species, datasets, Model),
191
- we average their Score.
192
  """
193
- pearson_df, mcc_df = load_raw_data()
194
- pearson_df = _normalize_training_hours(pearson_df)
195
- mcc_df = _normalize_training_hours(mcc_df)
196
-
197
- #if "track_name_clean" in pearson_df.columns:
198
- # pearson_df = pearson_df.drop(columns=["datasets"], errors="ignore")
199
- # pearson_df = pearson_df.rename(columns={"track_name_clean": "datasets"})
200
-
201
- # --- Pearson correlations ---
202
- # Expect columns: species, assay_type, datasets, model_name, pearson correlation
203
- pearson_df = pearson_df.rename(
204
- columns={
205
- "model_name": "Model",
206
- "pearson correlation": "Score",
207
- }
208
- )
209
 
210
- # --- Keep track_name_clean available (for head-to-head only later) ---
211
- pearson_track_map = None
212
- if "track_name_clean" in pearson_df.columns:
213
- map_keys = ["species", "datasets"]
214
- if "assay_type" in pearson_df.columns:
215
- map_keys.append("assay_type")
216
 
217
- pearson_track_map = (
218
- pearson_df[map_keys + ["track_name_clean"]]
219
- .dropna(subset=["track_name_clean"])
220
- .drop_duplicates()
221
- )
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
 
 
 
224
  pearson_group_cols = ["species", "datasets", "Model"]
225
- if "assay_type" in pearson_df.columns:
226
  pearson_group_cols.append("assay_type")
227
 
228
- agg_cols = {"Score": "mean"}
229
- if "GPU hours" in pearson_df.columns:
230
- agg_cols["GPU hours"] = "mean"
231
-
232
- # --- after aggregation ---
233
  pearson_df = (
234
- pearson_df
235
  .groupby(pearson_group_cols, as_index=False, dropna=False)
236
- .agg(agg_cols)
 
 
 
 
237
  )
238
 
239
- # ✅ Merge track_name_clean back FIRST (assay_type still raw here)
240
- if pearson_track_map is not None:
241
- pearson_df = pearson_df.merge(pearson_track_map, on=map_keys, how="left")
 
 
242
 
243
- # THEN map assay_type to your categories
 
 
 
 
 
 
 
244
  if "assay_type" in pearson_df.columns:
245
  pearson_df["assay_type"] = (
246
- pearson_df["assay_type"]
247
- .map(ASSAY_TYPE_MAPPING)
248
- .fillna("Other")
249
  )
250
 
251
-
252
-
253
- # --- MCC (bed tracks) ---
254
- # Expect columns: species, datasets, model_name, MCC
255
- mcc_df = mcc_df.rename(
256
- columns={
257
- "model_name": "Model",
258
- "MCC": "Score",
259
- }
260
- )
261
-
262
- # Collapse duplicates with same (species, datasets, Model)
263
- mcc_group_cols = ["species", "datasets", "Model"]
264
- agg_cols = {"Score": "mean"}
265
- if "GPU hours" in mcc_df.columns:
266
- agg_cols["GPU hours"] = "mean"
267
-
268
  mcc_df = (
269
- mcc_df
270
- .groupby(mcc_group_cols, as_index=False, dropna=False)
271
- .agg(agg_cols)
 
 
 
 
272
  )
273
 
274
- # Optional sanity checks
275
- for df_name, df in [("pearson", pearson_df), ("mcc", mcc_df)]:
276
- required = {"species", "datasets", "Model", "Score"}
277
- missing = required - set(df.columns)
278
- if missing:
279
- st.error(f"{df_name} dataframe missing columns: {missing}")
280
-
281
  return pearson_df, mcc_df
282
 
283
 
@@ -510,45 +498,6 @@ def plot_breakdown_facets_sorted_models(
510
 
511
  return fig
512
 
513
-
514
- def build_radar_df(
515
- benchmark_name: str,
516
- selected_species: List[str],
517
- selected_assays: List[str],
518
- selected_models: List[str],
519
- selected_datasets: List[str],
520
- ) -> pd.DataFrame:
521
- cfg = _BENCHMARKS[benchmark_name]
522
-
523
- df = filter_base_df(
524
- benchmark_name,
525
- selected_species,
526
- selected_assays,
527
- selected_models,
528
- selected_datasets,
529
- )
530
-
531
- if df.empty:
532
- return pd.DataFrame()
533
-
534
- # Choose axis column
535
- if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
536
- axis_col = "assay_type"
537
- axis_label = "Assay type"
538
- else:
539
- axis_col = "datasets"
540
- axis_label = "Dataset"
541
-
542
- radar_df = (
543
- df.groupby([axis_col, "Model"], as_index=False)["Score"]
544
- .mean()
545
- .rename(columns={axis_col: "Axis", "Score": "Value"})
546
- )
547
-
548
- radar_df.attrs["axis_label"] = axis_label
549
- return radar_df
550
-
551
-
552
  def build_pairwise_scatter_df(
553
  benchmark_name: str,
554
  selected_species: List[str],
@@ -558,19 +507,8 @@ def build_pairwise_scatter_df(
558
  model_a: str,
559
  model_b: str,
560
  ) -> pd.DataFrame:
561
- """
562
- Returns a per-track dataframe with columns:
563
- Track, Model A, Model B, (optional) species, (optional) assay_type, datasets
564
- Where each row corresponds to a specific track (datasets [+ assay_type]).
565
-
566
- Special case:
567
- If `track_name_clean` exists (typically for bigwig Functional Tracks),
568
- we use it ONLY for the head-to-head "Track" label (and track identity),
569
- while keeping the rest of the app using `datasets`.
570
- """
571
  cfg = _BENCHMARKS[benchmark_name]
572
 
573
- # Ensure chosen models are included even if toggles exclude them
574
  models_for_filter = (
575
  list(set(selected_models + [model_a, model_b]))
576
  if selected_models else [model_a, model_b]
@@ -583,24 +521,18 @@ def build_pairwise_scatter_df(
583
  models_for_filter,
584
  selected_datasets,
585
  )
586
-
587
  if df.empty:
588
  return pd.DataFrame()
589
 
590
- # Prefer track_name_clean for BigWig head-to-head labeling ONLY
591
- # (fallback to datasets if missing)
592
- track_id_col = "track_name_clean" if "track_name_clean" in df.columns else "datasets"
593
-
594
- # Define what “a specific track” means
595
- track_cols = [track_id_col]
596
  if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
597
- track_cols = ["assay_type", track_id_col]
598
 
599
- # Keep species in hover if multiple are selected
600
  keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
601
  id_cols = (["species"] if keep_species else []) + track_cols
602
 
603
- # Pivot into two model columns
604
  wide = (
605
  df[df["Model"].isin([model_a, model_b])]
606
  .pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
@@ -612,26 +544,28 @@ def build_pairwise_scatter_df(
612
 
613
  wide = wide.dropna(subset=[model_a, model_b])
614
 
615
- # Create a nice "Track" label for display (uses track_name_clean if available)
616
  if "assay_type" in wide.columns:
617
- wide["Track"] = wide["assay_type"].astype(str) + " / " + wide[track_id_col].astype(str)
618
  else:
619
- wide["Track"] = wide[track_id_col].astype(str)
620
 
621
- # Rename for plotting
622
  wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
623
 
624
- # If we used track_name_clean, keep datasets around too (if present) for hover/debug
625
- # (nothing breaks if it's absent)
626
- if track_id_col == "track_name_clean" and "datasets" in df.columns and "datasets" not in wide.columns:
627
- # merge back datasets for hover only
628
- merge_keys = id_cols.copy()
629
- extra = df[merge_keys + ["datasets"]].drop_duplicates()
630
- wide = wide.merge(extra, on=merge_keys, how="left")
 
 
631
 
632
  return wide
633
 
634
 
 
635
  def build_violin_df(
636
  benchmark_name: str,
637
  selected_species: List[str],
@@ -655,75 +589,15 @@ def build_violin_df(
655
  return df[keep].copy()
656
 
657
 
658
- def plot_radar(
659
- radar_df: pd.DataFrame,
660
- metric_label: str,
661
- height: int = 600,
662
- ):
663
- if radar_df.empty:
664
- return None
665
-
666
- axes = radar_df["Axis"].unique().tolist()
667
-
668
- # Global radial range
669
- r_min = radar_df["Value"].min()
670
- r_max = radar_df["Value"].max()
671
- pad = 0.05 * (r_max - r_min if r_max > r_min else 1.0)
672
- r_range = [r_min - pad, r_max + pad]
673
-
674
- fig = go.Figure()
675
-
676
- for model in radar_df["Model"].unique():
677
- sub = radar_df[radar_df["Model"] == model]
678
-
679
- # Ensure consistent axis ordering
680
- sub = sub.set_index("Axis").reindex(axes)
681
-
682
- fig.add_trace(
683
- go.Scatterpolar(
684
- r=sub["Value"],
685
- theta=axes,
686
- fill="toself",
687
- name=model,
688
- line_color=MODEL_COLORS.get(model),
689
- opacity=0.75,
690
- )
691
- )
692
-
693
- fig.update_layout(
694
- height=height,
695
- polar=dict(
696
- bgcolor="rgba(0,0,0,0)", # 👈 polar background
697
- radialaxis=dict(
698
- title=metric_label,
699
- range=r_range,
700
- tickformat=".2f",
701
- showgrid=True,
702
- gridcolor="rgba(0,0,0,0.15)", # subtle grid
703
- ),
704
- angularaxis=dict(
705
- showgrid=True,
706
- gridcolor="rgba(0,0,0,0.15)",
707
- ),
708
- ),
709
- paper_bgcolor="rgba(0,0,0,0)", # 👈 entire figure background
710
- plot_bgcolor="rgba(0,0,0,0)", # 👈 plot area
711
- showlegend=True,
712
- legend_title_text="Model",
713
- margin=dict(t=40, b=40, l=40, r=40),
714
- )
715
-
716
-
717
- return fig
718
-
719
-
720
  def build_convergence_df(
721
  benchmark_name: str,
722
  selected_species: List[str],
723
  selected_assays: List[str],
724
  selected_models: List[str],
725
  selected_datasets: List[str],
 
726
  ) -> pd.DataFrame:
 
727
  df = filter_base_df(
728
  benchmark_name,
729
  selected_species,
@@ -732,25 +606,58 @@ def build_convergence_df(
732
  selected_datasets,
733
  )
734
 
735
- if df.empty or "GPU hours" not in df.columns:
736
- return pd.DataFrame(columns=["Model", "GPU hours", "Performance"])
737
 
 
738
  out = (
739
  df.groupby("Model", as_index=False)
740
- .agg({"Score": "mean", "GPU hours": "mean"})
741
  .rename(columns={"Score": "Performance"})
742
  )
743
 
744
- # Apply per-model multiplier (default 1)
745
- out["GPU multiplier"] = out["Model"].map(MODEL_GPU_MULTIPLIER).fillna(1).astype(float)
746
- out["GPU hours"] = out["GPU hours"] * out["GPU multiplier"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747
 
748
- out = out.dropna(subset=["GPU hours", "Performance"])
749
  out["Performance"] = out["Performance"].round(3)
750
- out["GPU hours"] = out["GPU hours"].round(1)
751
 
752
  return out
753
 
 
754
  # ---------------------------------------------------------------------
755
  # UI helpers
756
  # ---------------------------------------------------------------------
@@ -893,7 +800,7 @@ def main():
893
  )
894
  fig.update_layout(
895
  barmode="group",
896
- height=480,
897
  xaxis_title="",
898
  yaxis_title=cfg["metric_label"],
899
  plot_bgcolor="rgba(0,0,0,0)",
@@ -970,11 +877,9 @@ def main():
970
  pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
971
  axis_range = [min_v - pad, max_v + pad]
972
  tick_step = (axis_range[1] - axis_range[0]) / 5
973
-
974
- hover_cols = []
975
-
976
- # Prefer track_name_clean; fall back to Track if not present
977
- if "track_name_clean" in scatter_df.columns:
978
  hover_cols.append("track_name_clean")
979
  else:
980
  hover_cols.append("datasets")
@@ -1038,49 +943,57 @@ def main():
1038
  with right:
1039
  st.markdown("#### ⏱️ Time to convergence")
1040
 
 
 
 
 
 
 
 
 
1041
  conv_df = build_convergence_df(
1042
  benchmark_name,
1043
  selected_species,
1044
  selected_assays,
1045
  selected_models,
1046
  selected_datasets,
 
1047
  )
1048
 
1049
  if conv_df.empty:
1050
- st.info("No training-time data found for the selected filters (missing 'GPU hours').")
1051
  else:
1052
  fig_conv = px.scatter(
1053
  conv_df,
1054
- x="GPU hours",
1055
  y="Performance",
1056
  text="Model",
1057
- color="Model", # 👈 color by model
1058
- color_discrete_map=MODEL_COLORS, # 👈 your palette
1059
- hover_data=["Model", "GPU hours", "Performance"],
1060
  )
1061
-
1062
- fig_conv.update_traces(textposition="top center")
1063
  fig_conv.update_layout(
1064
- height=630,
1065
- xaxis=dict(title="GPU hours", type="log"),
1066
- yaxis=dict(title=cfg["metric_label"]),
1067
  plot_bgcolor="rgba(0,0,0,0)",
1068
  paper_bgcolor="rgba(0,0,0,0)",
1069
- showlegend=False,
 
 
 
 
1070
  )
1071
 
1072
- fig_conv.update_xaxes(
 
 
1073
  type="log",
1074
- range=[0, np.log10(conv_df["GPU hours"].max())], # log10(1) = 0
1075
- title="GPU hours (log scale)",
1076
  )
1077
-
1078
- # optional: hide legend if labels already on points
1079
- # fig_conv.update_layout(showlegend=False)
1080
-
1081
  st.plotly_chart(fig_conv, use_container_width=True)
1082
 
1083
-
1084
  # ------------------------------------------------------------------
1085
  # Violin (full width, below)
1086
  # ------------------------------------------------------------------
 
82
  "splice acceptor": '#ff9900',
83
  "start codon": '#9933cc',
84
  }
85
+ ASSAY_COLORS["Other"] = "#808080"
86
 
87
  MODEL_COLORS = {
88
  "NTv3 650M (pos)": COLORS['blue_0'],
 
94
  "BPNet arch. 6M": COLORS['cyan_1'],
95
  "Residual CNN 44M": COLORS['magenta_1'],
96
  "PlantCAD2 88M": COLORS["purple_1"],
97
+ "Caduceus 7M": COLORS["purple_2"],
98
+ "HyenaDNA 7M": COLORS["yellow_2"]
99
  }
100
 
101
  MODEL_TRAINING_STATUS = {
 
109
  "NTv2 500M": "PRE",
110
  "BPNet arch. 6M": "SCRATCH",
111
  "PlantCAD2 88M": "PRE",
112
+ "HyenaDNA 7M": "PRE"
113
  }
114
 
115
  MODEL_GPU_MULTIPLIER = {
 
155
  PROJECT_ROOT = os.path.dirname(HERE) # /app
156
  DATA_DIR = os.path.join(PROJECT_ROOT, "data")
157
 
158
+ SINGLE_TABLE_PATH = os.path.join(DATA_DIR, "ntv3_benchmark_results.csv")
 
159
 
160
  # ---------------------------------------------------------------------
161
  # Data loading & preprocessing
 
164
 
165
  @st.cache_data
166
  def load_raw_data():
167
+ df = pd.read_csv(SINGLE_TABLE_PATH)
168
+ df.columns = [c.strip() for c in df.columns]
169
+ return df
170
 
 
 
171
 
172
+ def _normalize_training_time_to_gpu_hours(df: pd.DataFrame) -> pd.DataFrame:
173
+ """
174
+ Your new column is `running_time`. In your sample it looks like seconds
175
+ (e.g. 317034 ~= 88 hours). We'll convert to hours if values look like seconds.
176
+ """
177
+ if "running_time" not in df.columns:
178
+ return df
179
+
180
+ rt = pd.to_numeric(df["running_time"], errors="coerce")
181
+ # Heuristic: if median is huge, it's probably seconds -> convert to hours
182
+ # (88 hours = 316800 seconds is a typical-looking value in your sample)
183
+ if rt.dropna().median() > 10_000:
184
+ df["GPU hours"] = rt / 3600.0
185
+ else:
186
+ df["GPU hours"] = rt.astype(float)
187
 
188
+ return df
 
189
 
190
 
191
+ def _best_step_time_to_hours(s: pd.Series) -> pd.Series:
 
192
  """
193
+ Converts strings like '3 days 04:26:26.467000' to hours (float).
194
+ Works with pandas Timedelta parsing.
 
 
 
 
 
 
 
 
 
 
195
  """
196
+ td = pd.to_timedelta(s, errors="coerce")
197
+ return td.dt.total_seconds() / 3600.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
 
 
 
 
 
 
199
 
 
 
 
 
 
200
 
201
+ @st.cache_data
202
+ def load_expanded_data():
203
+ df = load_raw_data().copy()
204
+
205
+ df = df.rename(columns={"Metric": "Score", "model_name": "Model"})
206
+ df["Score"] = pd.to_numeric(df["Score"], errors="coerce")
207
+
208
+ if "best_step" in df.columns:
209
+ df["best_step"] = pd.to_numeric(df["best_step"], errors="coerce")
210
+
211
+ if "best_step_time" in df.columns:
212
+ df["best_step_time_hours"] = _best_step_time_to_hours(df["best_step_time"])
213
+ else:
214
+ df["best_step_time_hours"] = np.nan
215
+
216
+ is_annot = df.get("assay_type", "").astype(str).eq("Annotation")
217
+ pearson_raw = df[~is_annot].copy()
218
+ mcc_raw = df[is_annot].copy()
219
 
220
+ # -------------------------
221
+ # Functional Tracks (Pearson)
222
+ # -------------------------
223
  pearson_group_cols = ["species", "datasets", "Model"]
224
+ if "assay_type" in pearson_raw.columns:
225
  pearson_group_cols.append("assay_type")
226
 
 
 
 
 
 
227
  pearson_df = (
228
+ pearson_raw
229
  .groupby(pearson_group_cols, as_index=False, dropna=False)
230
+ .agg({
231
+ "Score": "mean",
232
+ "best_step": "mean",
233
+ "best_step_time_hours": "mean",
234
+ })
235
  )
236
 
237
+ # ✅ merge track_name_clean WHILE assay_type is still raw
238
+ if "track_name_clean" in pearson_raw.columns:
239
+ map_keys = ["species", "datasets"]
240
+ if "assay_type" in pearson_raw.columns:
241
+ map_keys.append("assay_type")
242
 
243
+ track_map = (
244
+ pearson_raw[map_keys + ["track_name_clean"]]
245
+ .dropna(subset=["track_name_clean"])
246
+ .drop_duplicates()
247
+ )
248
+ pearson_df = pearson_df.merge(track_map, on=map_keys, how="left")
249
+
250
+ # ✅ now it’s safe to map assay_type to categories
251
  if "assay_type" in pearson_df.columns:
252
  pearson_df["assay_type"] = (
253
+ pearson_df["assay_type"].astype(str).map(ASSAY_TYPE_MAPPING).fillna("Other")
 
 
254
  )
255
 
256
+ # -------------------------
257
+ # Genome Annotation (MCC)
258
+ # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  mcc_df = (
260
+ mcc_raw
261
+ .groupby(["species", "datasets", "Model"], as_index=False, dropna=False)
262
+ .agg({
263
+ "Score": "mean",
264
+ "best_step": "mean",
265
+ "best_step_time_hours": "mean",
266
+ })
267
  )
268
 
 
 
 
 
 
 
 
269
  return pearson_df, mcc_df
270
 
271
 
 
498
 
499
  return fig
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  def build_pairwise_scatter_df(
502
  benchmark_name: str,
503
  selected_species: List[str],
 
507
  model_a: str,
508
  model_b: str,
509
  ) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
510
  cfg = _BENCHMARKS[benchmark_name]
511
 
 
512
  models_for_filter = (
513
  list(set(selected_models + [model_a, model_b]))
514
  if selected_models else [model_a, model_b]
 
521
  models_for_filter,
522
  selected_datasets,
523
  )
 
524
  if df.empty:
525
  return pd.DataFrame()
526
 
527
+ # ---- define "track identity" for head-to-head ----
528
+ # Always use datasets for the identity (x/y points)
529
+ track_cols = ["datasets"]
 
 
 
530
  if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
531
+ track_cols = ["assay_type", "datasets"]
532
 
 
533
  keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
534
  id_cols = (["species"] if keep_species else []) + track_cols
535
 
 
536
  wide = (
537
  df[df["Model"].isin([model_a, model_b])]
538
  .pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
 
544
 
545
  wide = wide.dropna(subset=[model_a, model_b])
546
 
547
+ # Nice display label: use datasets (not track_name_clean)
548
  if "assay_type" in wide.columns:
549
+ wide["Track"] = wide["assay_type"].astype(str) + " / " + wide["datasets"].astype(str)
550
  else:
551
+ wide["Track"] = wide["datasets"].astype(str)
552
 
 
553
  wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
554
 
555
+ # ---- Pearson-only: merge track_name_clean for hover ----
556
+ if benchmark_name == "Functional Tracks" and "track_name_clean" in df.columns:
557
+ merge_keys = id_cols.copy() # species? + assay_type? + datasets
558
+ track_map = (
559
+ df[merge_keys + ["track_name_clean"]]
560
+ .dropna(subset=["track_name_clean"])
561
+ .drop_duplicates()
562
+ )
563
+ wide = wide.merge(track_map, on=merge_keys, how="left")
564
 
565
  return wide
566
 
567
 
568
+
569
  def build_violin_df(
570
  benchmark_name: str,
571
  selected_species: List[str],
 
589
  return df[keep].copy()
590
 
591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  def build_convergence_df(
593
  benchmark_name: str,
594
  selected_species: List[str],
595
  selected_assays: List[str],
596
  selected_models: List[str],
597
  selected_datasets: List[str],
598
+ x_mode: str = "best_step", # "best_step" | "best_step_time"
599
  ) -> pd.DataFrame:
600
+
601
  df = filter_base_df(
602
  benchmark_name,
603
  selected_species,
 
606
  selected_datasets,
607
  )
608
 
609
+ if df.empty:
610
+ return pd.DataFrame(columns=["Model", "X", "Performance"])
611
 
612
+ # Mean performance per model
613
  out = (
614
  df.groupby("Model", as_index=False)
615
+ .agg({"Score": "mean"})
616
  .rename(columns={"Score": "Performance"})
617
  )
618
 
619
+ # -------------------------
620
+ # X axis selection
621
+ # -------------------------
622
+ if x_mode == "Steps":
623
+ if "best_step" not in df.columns:
624
+ return pd.DataFrame(columns=["Model", "X", "Performance"])
625
+
626
+ x = (
627
+ df.groupby("Model", as_index=False)["best_step"]
628
+ .mean()
629
+ .rename(columns={"best_step": "X"})
630
+ )
631
+
632
+ else: # best_step_time (GPU hours)
633
+ if "best_step_time_hours" not in df.columns:
634
+ return pd.DataFrame(columns=["Model", "X", "Performance"])
635
+
636
+ x = (
637
+ df.groupby("Model", as_index=False)["best_step_time_hours"]
638
+ .mean()
639
+ .rename(columns={"best_step_time_hours": "X"})
640
+ )
641
+
642
+ # 👇 Apply GPU multiplier (Evo2 uses 8 GPUs)
643
+ gpu_multiplier = {
644
+ "Evo2 1B": 8,
645
+ }
646
+
647
+ x["X"] = x.apply(
648
+ lambda r: r["X"] * gpu_multiplier.get(r["Model"], 1),
649
+ axis=1,
650
+ )
651
+
652
+ # Merge + clean
653
+ out = out.merge(x, on="Model", how="left")
654
+ out = out.dropna(subset=["X", "Performance"])
655
 
 
656
  out["Performance"] = out["Performance"].round(3)
 
657
 
658
  return out
659
 
660
+
661
  # ---------------------------------------------------------------------
662
  # UI helpers
663
  # ---------------------------------------------------------------------
 
800
  )
801
  fig.update_layout(
802
  barmode="group",
803
+ height=500,
804
  xaxis_title="",
805
  yaxis_title=cfg["metric_label"],
806
  plot_bgcolor="rgba(0,0,0,0)",
 
877
  pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
878
  axis_range = [min_v - pad, max_v + pad]
879
  tick_step = (axis_range[1] - axis_range[0]) / 5
880
+
881
+ hover_cols = ["datasets"]
882
+ if benchmark_name == "Functional Tracks" and "track_name_clean" in scatter_df.columns:
 
 
883
  hover_cols.append("track_name_clean")
884
  else:
885
  hover_cols.append("datasets")
 
943
  with right:
944
  st.markdown("#### ⏱️ Time to convergence")
945
 
946
+ x_mode = st.selectbox(
947
+ "X-axis",
948
+ options=["GPU hours", "Steps"],
949
+ index=0,
950
+ key=f"conv_x_mode_{benchmark_name}",
951
+ )
952
+
953
+
954
  conv_df = build_convergence_df(
955
  benchmark_name,
956
  selected_species,
957
  selected_assays,
958
  selected_models,
959
  selected_datasets,
960
+ x_mode=x_mode,
961
  )
962
 
963
  if conv_df.empty:
964
+ st.info("No convergence data found for the selected filters / x-axis mode.")
965
  else:
966
  fig_conv = px.scatter(
967
  conv_df,
968
+ x="X",
969
  y="Performance",
970
  text="Model",
971
+ color="Model",
972
+ color_discrete_map=MODEL_COLORS,
973
+ hover_data=["Model", "X", "Performance"],
974
  )
 
 
975
  fig_conv.update_layout(
976
+ height=550,
977
+ xaxis_title=("GPU hours" if x_mode == "GPU hours" else x_mode),
978
+ yaxis_title=cfg["metric_label"],
979
  plot_bgcolor="rgba(0,0,0,0)",
980
  paper_bgcolor="rgba(0,0,0,0)",
981
+ showlegend=False, # ✅ no legend
982
+ )
983
+ fig_conv.update_traces(
984
+ marker=dict(size=14), # 👈 bigger dots
985
+ textposition="top center",
986
  )
987
 
988
+ # Log scale only makes sense for hours (and sometimes best_step)
989
+ if x_mode in ["GPU hours"]:
990
+ fig_conv.update_xaxes(
991
  type="log",
992
+ dtick=1,
993
+ minor=dict(ticks="", showgrid=False),
994
  )
 
 
 
 
995
  st.plotly_chart(fig_conv, use_container_width=True)
996
 
 
997
  # ------------------------------------------------------------------
998
  # Violin (full width, below)
999
  # ------------------------------------------------------------------