Spaces:
Running
Running
feat: use bernardo's dataset and fix scatterplot
Browse files- data/bed_dataset.csv +0 -334
- data/bigwig_dataset.csv +0 -0
- data/ntv3_benchmark_results.csv +0 -0
- src/streamlit_app.py +172 -259
data/bed_dataset.csv
DELETED
|
@@ -1,334 +0,0 @@
|
|
| 1 |
-
MCC,model_name,running_time_hours,species,datasets
|
| 2 |
-
0.334637850522995,NTv2 500M,88.0,cattle,intron
|
| 3 |
-
0.1238768473267555,BPNet arch. 6M,4.0,cattle,intron
|
| 4 |
-
0.383470207452774,Residual CNN 44M,19.0,cattle,intron
|
| 5 |
-
0.3828243613243103,HyenaDNA 7M,23.0,cattle,intron
|
| 6 |
-
0.4733810424804687,Caduceus 7M,32.0,cattle,intron
|
| 7 |
-
0.4315277338027954,Evo2 1B,43.0,cattle,intron
|
| 8 |
-
0.5455867648124695,NTv3 8M (pre),1.0,cattle,intron
|
| 9 |
-
0.5453664064407349,NTv3 100M (pre),2.0,cattle,intron
|
| 10 |
-
0.5628412365913391,NTv3 650M (pre),5.0,cattle,intron
|
| 11 |
-
0.5682631134986877,NTv3 650M (pos),7.0,cattle,intron
|
| 12 |
-
0.3689357042312622,NTv2 500M,87.0,cattle,exon
|
| 13 |
-
0.3250860869884491,BPNet arch. 6M,4.0,cattle,exon
|
| 14 |
-
0.4674676060676574,Residual CNN 44M,19.0,cattle,exon
|
| 15 |
-
0.2207767516374588,HyenaDNA 7M,21.0,cattle,exon
|
| 16 |
-
0.4960922300815582,Caduceus 7M,32.0,cattle,exon
|
| 17 |
-
0.4969632029533386,Evo2 1B,44.0,cattle,exon
|
| 18 |
-
0.5432836413383484,NTv3 8M (pre),1.0,cattle,exon
|
| 19 |
-
0.5531933307647705,NTv3 100M (pre),2.0,cattle,exon
|
| 20 |
-
0.591151773929596,NTv3 650M (pre),5.0,cattle,exon
|
| 21 |
-
0.6253225207328796,NTv3 650M (pos),7.0,cattle,exon
|
| 22 |
-
0.118808165192604,NTv2 500M,86.0,cattle,splice acceptor
|
| 23 |
-
0.4715546369552612,BPNet arch. 6M,4.0,cattle,splice acceptor
|
| 24 |
-
0.6620649099349976,Residual CNN 44M,19.0,cattle,splice acceptor
|
| 25 |
-
0.104436807334423,HyenaDNA 7M,22.0,cattle,splice acceptor
|
| 26 |
-
0.7064619660377502,Caduceus 7M,30.0,cattle,splice acceptor
|
| 27 |
-
0.2085049450397491,Evo2 1B,43.0,cattle,splice acceptor
|
| 28 |
-
0.7254849076271057,NTv3 8M (pre),1.0,cattle,splice acceptor
|
| 29 |
-
0.7404072880744934,NTv3 100M (pre),2.0,cattle,splice acceptor
|
| 30 |
-
0.7732946872711182,NTv3 650M (pre),5.0,cattle,splice acceptor
|
| 31 |
-
0.7679624557495117,NTv3 650M (pos),7.0,cattle,splice acceptor
|
| 32 |
-
0.1412438601255417,NTv2 500M,88.0,cattle,start codon
|
| 33 |
-
0.1490814685821533,BPNet arch. 6M,4.0,cattle,start codon
|
| 34 |
-
0.3243320286273956,Residual CNN 44M,19.0,cattle,start codon
|
| 35 |
-
0.056509330868721,HyenaDNA 7M,23.0,cattle,start codon
|
| 36 |
-
0.3455557227134704,Caduceus 7M,33.0,cattle,start codon
|
| 37 |
-
0.1030694246292114,Evo2 1B,43.0,cattle,start codon
|
| 38 |
-
0.5275959968566895,NTv3 8M (pre),1.0,cattle,start codon
|
| 39 |
-
0.4962065815925598,NTv3 100M (pre),2.0,cattle,start codon
|
| 40 |
-
0.5591813921928406,NTv3 650M (pre),5.0,cattle,start codon
|
| 41 |
-
0.5492052435874939,NTv3 650M (pos),7.0,cattle,start codon
|
| 42 |
-
0.0383123345673084,NTv2 500M,90.0,cattle,intron
|
| 43 |
-
0.1015273928642273,BPNet arch. 6M,7.0,cattle,intron
|
| 44 |
-
0.3299930691719055,Residual CNN 44M,23.0,cattle,intron
|
| 45 |
-
0.3826011121273041,HyenaDNA 7M,20.0,cattle,intron
|
| 46 |
-
0.5564854741096497,Caduceus 7M,36.0,cattle,intron
|
| 47 |
-
0.5645747780799866,NTv3 8M (pre),2.0,cattle,intron
|
| 48 |
-
0.5765650272369385,NTv3 100M (pre),2.0,cattle,intron
|
| 49 |
-
0.6140890121459961,NTv3 650M (pre),7.0,cattle,intron
|
| 50 |
-
0.6709504723548889,NTv3 650M (pos),10.0,cattle,intron
|
| 51 |
-
0.3665516376495361,NTv2 500M,88.0,cattle,exon
|
| 52 |
-
0.323502242565155,BPNet arch. 6M,7.0,cattle,exon
|
| 53 |
-
0.519285261631012,Residual CNN 44M,23.0,cattle,exon
|
| 54 |
-
0.2514283955097198,HyenaDNA 7M,23.0,cattle,exon
|
| 55 |
-
0.5072187781333923,Caduceus 7M,39.0,cattle,exon
|
| 56 |
-
0.593974232673645,NTv3 8M (pre),1.0,cattle,exon
|
| 57 |
-
0.6014777421951294,NTv3 100M (pre),2.0,cattle,exon
|
| 58 |
-
0.6433462500572205,NTv3 650M (pre),7.0,cattle,exon
|
| 59 |
-
0.6648420095443726,NTv3 650M (pos),9.0,cattle,exon
|
| 60 |
-
0.0937248468399047,NTv2 500M,89.0,cattle,splice acceptor
|
| 61 |
-
0.4435675740242004,BPNet arch. 6M,7.0,cattle,splice acceptor
|
| 62 |
-
0.6590774655342102,Residual CNN 44M,23.0,cattle,splice acceptor
|
| 63 |
-
0.1038060635328292,HyenaDNA 7M,21.0,cattle,splice acceptor
|
| 64 |
-
0.6937510371208191,Caduceus 7M,38.0,cattle,splice acceptor
|
| 65 |
-
0.7248824238777161,NTv3 8M (pre),1.0,cattle,splice acceptor
|
| 66 |
-
0.7345820069313049,NTv3 100M (pre),2.0,cattle,splice acceptor
|
| 67 |
-
0.7439091801643372,NTv3 650M (pre),7.0,cattle,splice acceptor
|
| 68 |
-
0.758992075920105,NTv3 650M (pos),9.0,cattle,splice acceptor
|
| 69 |
-
0.1127461418509483,NTv2 500M,88.0,cattle,start codon
|
| 70 |
-
0.0901669710874557,BPNet arch. 6M,7.0,cattle,start codon
|
| 71 |
-
0.3548502624034881,Residual CNN 44M,23.0,cattle,start codon
|
| 72 |
-
0.0545537285506725,HyenaDNA 7M,24.0,cattle,start codon
|
| 73 |
-
0.4038819670677185,Caduceus 7M,38.0,cattle,start codon
|
| 74 |
-
0.5045616030693054,NTv3 8M (pre),1.0,cattle,start codon
|
| 75 |
-
0.4762806594371795,NTv3 100M (pre),3.0,cattle,start codon
|
| 76 |
-
0.5610686540603638,NTv3 650M (pre),7.0,cattle,start codon
|
| 77 |
-
0.5782408118247986,NTv3 650M (pos),9.0,cattle,start codon
|
| 78 |
-
0.1547228246927261,NTv2 500M,85.0,cattle,intron
|
| 79 |
-
0.1383400112390518,BPNet arch. 6M,6.0,cattle,intron
|
| 80 |
-
0.3266464471817016,Residual CNN 44M,23.0,cattle,intron
|
| 81 |
-
0.4240079522132873,HyenaDNA 7M,23.0,cattle,intron
|
| 82 |
-
0.4552704095840454,Caduceus 7M,37.0,cattle,intron
|
| 83 |
-
0.5063548684120178,NTv3 8M (pre),1.0,cattle,intron
|
| 84 |
-
0.5619235038757324,NTv3 100M (pre),3.0,cattle,intron
|
| 85 |
-
0.531277596950531,NTv3 650M (pre),7.0,cattle,intron
|
| 86 |
-
0.6205132603645325,NTv3 650M (pos),9.0,cattle,intron
|
| 87 |
-
0.3413117229938507,NTv2 500M,87.0,cattle,exon
|
| 88 |
-
0.2900931537151336,BPNet arch. 6M,7.0,cattle,exon
|
| 89 |
-
0.4856111407279968,Residual CNN 44M,23.0,cattle,exon
|
| 90 |
-
0.2246854901313781,HyenaDNA 7M,70.0,cattle,exon
|
| 91 |
-
0.5370016098022461,Caduceus 7M,35.0,cattle,exon
|
| 92 |
-
0.5721412897109985,NTv3 8M (pre),2.0,cattle,exon
|
| 93 |
-
0.5819903612136841,NTv3 100M (pre),2.0,cattle,exon
|
| 94 |
-
0.6183731555938721,NTv3 650M (pre),7.0,cattle,exon
|
| 95 |
-
0.6233119964599609,NTv3 650M (pos),9.0,cattle,exon
|
| 96 |
-
0.1367750316858291,NTv2 500M,89.0,cattle,splice acceptor
|
| 97 |
-
0.4220209121704101,BPNet arch. 6M,7.0,cattle,splice acceptor
|
| 98 |
-
0.689546525478363,Residual CNN 44M,23.0,cattle,splice acceptor
|
| 99 |
-
0.1121769621968269,HyenaDNA 7M,69.0,cattle,splice acceptor
|
| 100 |
-
0.7314619421958923,Caduceus 7M,37.0,cattle,splice acceptor
|
| 101 |
-
0.74350905418396,NTv3 8M (pre),2.0,cattle,splice acceptor
|
| 102 |
-
0.746654748916626,NTv3 100M (pre),2.0,cattle,splice acceptor
|
| 103 |
-
0.7714020609855652,NTv3 650M (pre),7.0,cattle,splice acceptor
|
| 104 |
-
0.7809271812438965,NTv3 650M (pos),9.0,cattle,splice acceptor
|
| 105 |
-
0.0901266038417816,NTv2 500M,89.0,cattle,start codon
|
| 106 |
-
0.0930091217160224,BPNet arch. 6M,6.0,cattle,start codon
|
| 107 |
-
0.423166275024414,Residual CNN 44M,23.0,cattle,start codon
|
| 108 |
-
0.1253955662250518,HyenaDNA 7M,72.0,cattle,start codon
|
| 109 |
-
0.33419930934906,Caduceus 7M,37.0,cattle,start codon
|
| 110 |
-
0.4639334082603454,NTv3 8M (pre),1.0,cattle,start codon
|
| 111 |
-
0.5102551579475403,NTv3 100M (pre),2.0,cattle,start codon
|
| 112 |
-
0.5866840481758118,NTv3 650M (pre),7.0,cattle,start codon
|
| 113 |
-
0.588148832321167,NTv3 650M (pos),9.0,cattle,start codon
|
| 114 |
-
0.4777896404266357,NTv2 500M,33.0,tomato,intron
|
| 115 |
-
0.3216900527477264,BPNet arch. 6M,1.0,tomato,intron
|
| 116 |
-
0.46840900182724,Residual CNN 44M,6.0,tomato,intron
|
| 117 |
-
0.5251263380050659,PlantCAD2 88M,38.0,tomato,intron
|
| 118 |
-
0.747674286365509,Evo2 1B,13.0,tomato,intron
|
| 119 |
-
0.6858112812042236,NTv3 8M (pre),0.0,tomato,intron
|
| 120 |
-
0.7038365006446838,NTv3 100M (pre),0.0,tomato,intron
|
| 121 |
-
0.7481895685195923,NTv3 650M (pre),1.0,tomato,intron
|
| 122 |
-
0.7458349466323853,NTv3 650M (pos),2.0,tomato,intron
|
| 123 |
-
0.6147475838661194,NTv2 500M,33.0,tomato,exon
|
| 124 |
-
0.4551227986812591,BPNet arch. 6M,1.0,tomato,exon
|
| 125 |
-
0.5068296194076538,Residual CNN 44M,6.0,tomato,exon
|
| 126 |
-
0.7256030440330505,PlantCAD2 88M,37.0,tomato,exon
|
| 127 |
-
0.7006198763847351,Evo2 1B,14.0,tomato,exon
|
| 128 |
-
0.7537696361541748,NTv3 8M (pre),0.0,tomato,exon
|
| 129 |
-
0.7484462857246399,NTv3 100M (pre),0.0,tomato,exon
|
| 130 |
-
0.764011561870575,NTv3 650M (pre),1.0,tomato,exon
|
| 131 |
-
0.7750575542449951,NTv3 650M (pos),2.0,tomato,exon
|
| 132 |
-
0.1691933125257492,NTv2 500M,33.0,tomato,splice acceptor
|
| 133 |
-
0.125656172633171,BPNet arch. 6M,1.0,tomato,splice acceptor
|
| 134 |
-
0.4359458982944488,Residual CNN 44M,6.0,tomato,splice acceptor
|
| 135 |
-
0.744257926940918,PlantCAD2 88M,38.0,tomato,splice acceptor
|
| 136 |
-
0.3791649639606476,Evo2 1B,13.0,tomato,splice acceptor
|
| 137 |
-
0.6623862385749817,NTv3 8M (pre),0.0,tomato,splice acceptor
|
| 138 |
-
0.6843105554580688,NTv3 100M (pre),0.0,tomato,splice acceptor
|
| 139 |
-
0.7641868591308594,NTv3 650M (pre),1.0,tomato,splice acceptor
|
| 140 |
-
0.7584431767463684,NTv3 650M (pos),2.0,tomato,splice acceptor
|
| 141 |
-
0.132934883236885,NTv2 500M,34.0,tomato,start codon
|
| 142 |
-
0.0,BPNet arch. 6M,1.0,tomato,start codon
|
| 143 |
-
0.088478960096836,Residual CNN 44M,6.0,tomato,start codon
|
| 144 |
-
0.2019559442996978,PlantCAD2 88M,38.0,tomato,start codon
|
| 145 |
-
0.1622217148542404,Evo2 1B,13.0,tomato,start codon
|
| 146 |
-
0.2966536581516266,NTv3 8M (pre),0.0,tomato,start codon
|
| 147 |
-
0.3968957066535949,NTv3 100M (pre),0.0,tomato,start codon
|
| 148 |
-
0.4830105900764465,NTv3 650M (pre),1.0,tomato,start codon
|
| 149 |
-
0.5007501244544983,NTv3 650M (pos),2.0,tomato,start codon
|
| 150 |
-
0.6770024299621582,NTv2 500M,33.0,tomato,intron
|
| 151 |
-
0.2927957773208618,BPNet arch. 6M,2.0,tomato,intron
|
| 152 |
-
0.557494580745697,Residual CNN 44M,8.0,tomato,intron
|
| 153 |
-
0.7252154350280762,PlantCAD2 88M,46.0,tomato,intron
|
| 154 |
-
0.712181031703949,NTv3 8M (pre),1.0,tomato,intron
|
| 155 |
-
0.7515084147453308,NTv3 100M (pre),1.0,tomato,intron
|
| 156 |
-
0.7400797009468079,NTv3 650M (pre),3.0,tomato,intron
|
| 157 |
-
0.7532288432121277,NTv3 650M (pos),4.0,tomato,intron
|
| 158 |
-
0.5751976370811462,NTv2 500M,33.0,tomato,exon
|
| 159 |
-
0.3057552278041839,BPNet arch. 6M,3.0,tomato,exon
|
| 160 |
-
0.5581462979316711,Residual CNN 44M,8.0,tomato,exon
|
| 161 |
-
0.7699167728424072,PlantCAD2 88M,50.0,tomato,exon
|
| 162 |
-
0.748009443283081,NTv3 8M (pre),1.0,tomato,exon
|
| 163 |
-
0.7629056572914124,NTv3 100M (pre),1.0,tomato,exon
|
| 164 |
-
0.7755228877067566,NTv3 650M (pre),3.0,tomato,exon
|
| 165 |
-
0.782516598701477,NTv3 650M (pos),4.0,tomato,exon
|
| 166 |
-
0.168193981051445,NTv2 500M,33.0,tomato,splice acceptor
|
| 167 |
-
0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
|
| 168 |
-
0.4833243191242218,Residual CNN 44M,8.0,tomato,splice acceptor
|
| 169 |
-
0.7335307598114014,PlantCAD2 88M,46.0,tomato,splice acceptor
|
| 170 |
-
0.6908777952194214,NTv3 8M (pre),1.0,tomato,splice acceptor
|
| 171 |
-
0.7348777055740356,NTv3 100M (pre),1.0,tomato,splice acceptor
|
| 172 |
-
0.7484620809555054,NTv3 650M (pre),3.0,tomato,splice acceptor
|
| 173 |
-
0.7539154291152954,NTv3 650M (pos),4.0,tomato,splice acceptor
|
| 174 |
-
0.1586925536394119,NTv2 500M,33.0,tomato,start codon
|
| 175 |
-
0.0,BPNet arch. 6M,2.0,tomato,start codon
|
| 176 |
-
0.1107296794652938,Residual CNN 44M,8.0,tomato,start codon
|
| 177 |
-
0.3756755590438843,PlantCAD2 88M,48.0,tomato,start codon
|
| 178 |
-
0.4113904237747192,NTv3 8M (pre),1.0,tomato,start codon
|
| 179 |
-
0.4541433155536651,NTv3 100M (pre),1.0,tomato,start codon
|
| 180 |
-
0.5002310872077942,NTv3 650M (pre),3.0,tomato,start codon
|
| 181 |
-
0.5470007658004761,NTv3 650M (pos),4.0,tomato,start codon
|
| 182 |
-
0.6712294220924377,NTv2 500M,33.0,tomato,intron
|
| 183 |
-
0.3502058088779449,BPNet arch. 6M,2.0,tomato,intron
|
| 184 |
-
0.5514466166496277,Residual CNN 44M,8.0,tomato,intron
|
| 185 |
-
0.722817599773407,PlantCAD2 88M,88.0,tomato,intron
|
| 186 |
-
0.7013162970542908,NTv3 8M (pre),1.0,tomato,intron
|
| 187 |
-
0.747364342212677,NTv3 100M (pre),1.0,tomato,intron
|
| 188 |
-
0.752423107624054,NTv3 650M (pre),3.0,tomato,intron
|
| 189 |
-
0.7750566005706787,NTv3 650M (pos),4.0,tomato,intron
|
| 190 |
-
0.6022632718086243,NTv2 500M,33.0,tomato,exon
|
| 191 |
-
0.3020758032798767,BPNet arch. 6M,2.0,tomato,exon
|
| 192 |
-
0.4746756553649902,Residual CNN 44M,8.0,tomato,exon
|
| 193 |
-
0.7354215979576111,PlantCAD2 88M,45.0,tomato,exon
|
| 194 |
-
0.7157281041145325,NTv3 8M (pre),1.0,tomato,exon
|
| 195 |
-
0.7326820492744446,NTv3 100M (pre),1.0,tomato,exon
|
| 196 |
-
0.7308483123779297,NTv3 650M (pre),3.0,tomato,exon
|
| 197 |
-
0.7417197823524475,NTv3 650M (pos),4.0,tomato,exon
|
| 198 |
-
0.1558358669281005,NTv2 500M,33.0,tomato,splice acceptor
|
| 199 |
-
0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
|
| 200 |
-
0.3391502797603607,Residual CNN 44M,8.0,tomato,splice acceptor
|
| 201 |
-
0.7305923700332642,PlantCAD2 88M,85.0,tomato,splice acceptor
|
| 202 |
-
0.6977006196975708,NTv3 8M (pre),1.0,tomato,splice acceptor
|
| 203 |
-
0.6770275831222534,NTv3 100M (pre),1.0,tomato,splice acceptor
|
| 204 |
-
0.6770390272140503,NTv3 650M (pre),3.0,tomato,splice acceptor
|
| 205 |
-
0.7287323474884033,NTv3 650M (pos),4.0,tomato,splice acceptor
|
| 206 |
-
0.1887903958559036,NTv2 500M,33.0,tomato,start codon
|
| 207 |
-
0.0639578104019165,BPNet arch. 6M,2.0,tomato,start codon
|
| 208 |
-
0.0914037525653839,Residual CNN 44M,8.0,tomato,start codon
|
| 209 |
-
0.4881043434143066,PlantCAD2 88M,88.0,tomato,start codon
|
| 210 |
-
0.4309621453285217,NTv3 8M (pre),1.0,tomato,start codon
|
| 211 |
-
0.4028272926807403,NTv3 100M (pre),1.0,tomato,start codon
|
| 212 |
-
0.4060510396957397,NTv3 650M (pre),3.0,tomato,start codon
|
| 213 |
-
0.472331553697586,NTv3 650M (pos),4.0,tomato,start codon
|
| 214 |
-
0.1995969861745834,NTv2 500M,72.0,human,intron
|
| 215 |
-
0.0296161584556102,BPNet arch. 6M,3.0,human,intron
|
| 216 |
-
0.2347834408283233,Residual CNN 44M,15.0,human,intron
|
| 217 |
-
0.33451908826828,HyenaDNA 7M,17.0,human,intron
|
| 218 |
-
0.4144788980484009,Caduceus 7M,27.0,human,intron
|
| 219 |
-
0.0,Evo2 1B,34.0,human,intron
|
| 220 |
-
0.4695742726325989,NTv3 8M (pre),1.0,human,intron
|
| 221 |
-
0.475054919719696,NTv3 100M (pre),2.0,human,intron
|
| 222 |
-
0.5504136681556702,NTv3 650M (pre),5.0,human,intron
|
| 223 |
-
0.5643875002861023,NTv3 650M (pos),6.0,human,intron
|
| 224 |
-
0.0546500161290168,NTv2 500M,72.0,human,exon
|
| 225 |
-
0.2706590592861175,BPNet arch. 6M,3.0,human,exon
|
| 226 |
-
0.2678671479225158,Residual CNN 44M,15.0,human,exon
|
| 227 |
-
0.179698497056961,HyenaDNA 7M,19.0,human,exon
|
| 228 |
-
0.5098947286605835,Caduceus 7M,26.0,human,exon
|
| 229 |
-
0.4510694444179535,Evo2 1B,34.0,human,exon
|
| 230 |
-
0.6089931726455688,NTv3 8M (pre),1.0,human,exon
|
| 231 |
-
0.6492856740951538,NTv3 100M (pre),2.0,human,exon
|
| 232 |
-
0.6975767016410828,NTv3 650M (pre),5.0,human,exon
|
| 233 |
-
0.6822624206542969,NTv3 650M (pos),8.0,human,exon
|
| 234 |
-
0.1493269056081771,NTv2 500M,73.0,human,splice acceptor
|
| 235 |
-
0.3807527124881744,BPNet arch. 6M,3.0,human,splice acceptor
|
| 236 |
-
0.6632664203643799,Residual CNN 44M,15.0,human,splice acceptor
|
| 237 |
-
0.1002769619226455,HyenaDNA 7M,17.0,human,splice acceptor
|
| 238 |
-
0.7357247471809387,Caduceus 7M,24.0,human,splice acceptor
|
| 239 |
-
0.1821079105138778,Evo2 1B,34.0,human,splice acceptor
|
| 240 |
-
0.7726271748542786,NTv3 8M (pre),1.0,human,splice acceptor
|
| 241 |
-
0.77947598695755,NTv3 100M (pre),2.0,human,splice acceptor
|
| 242 |
-
0.8028115034103394,NTv3 650M (pre),5.0,human,splice acceptor
|
| 243 |
-
0.7979229092597961,NTv3 650M (pos),7.0,human,splice acceptor
|
| 244 |
-
0.139576569199562,NTv2 500M,73.0,human,start codon
|
| 245 |
-
0.1334401220083236,BPNet arch. 6M,3.0,human,start codon
|
| 246 |
-
0.3876807987689972,Residual CNN 44M,15.0,human,start codon
|
| 247 |
-
0.1003016158938407,HyenaDNA 7M,18.0,human,start codon
|
| 248 |
-
0.3958532512187958,Caduceus 7M,24.0,human,start codon
|
| 249 |
-
0.1399599611759185,Evo2 1B,34.0,human,start codon
|
| 250 |
-
0.540923535823822,NTv3 8M (pre),1.0,human,start codon
|
| 251 |
-
0.5464004278182983,NTv3 100M (pre),2.0,human,start codon
|
| 252 |
-
0.6803378462791443,NTv3 650M (pre),5.0,human,start codon
|
| 253 |
-
0.7310947179794312,NTv3 650M (pos),7.0,human,start codon
|
| 254 |
-
0.0814515128731727,NTv2 500M,72.0,human,intron
|
| 255 |
-
0.0172978900372982,BPNet arch. 6M,5.0,human,intron
|
| 256 |
-
0.2740728259086609,Residual CNN 44M,19.0,human,intron
|
| 257 |
-
0.3312098085880279,HyenaDNA 7M,21.0,human,intron
|
| 258 |
-
0.5108950138092041,Caduceus 7M,33.0,human,intron
|
| 259 |
-
0.5034915208816528,NTv3 8M (pre),1.0,human,intron
|
| 260 |
-
0.5154411792755127,NTv3 100M (pre),2.0,human,intron
|
| 261 |
-
0.5814740061759949,NTv3 650M (pre),6.0,human,intron
|
| 262 |
-
0.5920455455780029,NTv3 650M (pos),8.0,human,intron
|
| 263 |
-
0.3505669236183166,NTv2 500M,72.0,human,exon
|
| 264 |
-
0.2252149283885955,BPNet arch. 6M,5.0,human,exon
|
| 265 |
-
0.4010578095912933,Residual CNN 44M,18.0,human,exon
|
| 266 |
-
0.1851459741592407,HyenaDNA 7M,18.0,human,exon
|
| 267 |
-
0.4599409103393554,Caduceus 7M,33.0,human,exon
|
| 268 |
-
0.5931490063667297,NTv3 8M (pre),1.0,human,exon
|
| 269 |
-
0.6058318018913269,NTv3 100M (pre),2.0,human,exon
|
| 270 |
-
0.6738048791885376,NTv3 650M (pre),6.0,human,exon
|
| 271 |
-
0.6936564445495605,NTv3 650M (pos),8.0,human,exon
|
| 272 |
-
0.1533636748790741,NTv2 500M,72.0,human,splice acceptor
|
| 273 |
-
0.3751010596752167,BPNet arch. 6M,5.0,human,splice acceptor
|
| 274 |
-
0.681228756904602,Residual CNN 44M,19.0,human,splice acceptor
|
| 275 |
-
0.0252278540283441,HyenaDNA 7M,22.0,human,splice acceptor
|
| 276 |
-
0.7485092878341675,Caduceus 7M,35.0,human,splice acceptor
|
| 277 |
-
0.7772909998893738,NTv3 8M (pre),1.0,human,splice acceptor
|
| 278 |
-
0.794090747833252,NTv3 100M (pre),2.0,human,splice acceptor
|
| 279 |
-
0.8239933252334595,NTv3 650M (pre),6.0,human,splice acceptor
|
| 280 |
-
0.804115891456604,NTv3 650M (pos),8.0,human,splice acceptor
|
| 281 |
-
0.0851806029677391,NTv2 500M,72.0,human,start codon
|
| 282 |
-
0.0,BPNet arch. 6M,5.0,human,start codon
|
| 283 |
-
0.3292546272277832,Residual CNN 44M,19.0,human,start codon
|
| 284 |
-
0.0647941380739212,HyenaDNA 7M,20.0,human,start codon
|
| 285 |
-
0.4505241215229034,Caduceus 7M,33.0,human,start codon
|
| 286 |
-
0.60422682762146,NTv3 8M (pre),1.0,human,start codon
|
| 287 |
-
0.6015576124191284,NTv3 100M (pre),2.0,human,start codon
|
| 288 |
-
0.6452956795692444,NTv3 650M (pre),6.0,human,start codon
|
| 289 |
-
0.6761345267295837,NTv3 650M (pos),8.0,human,start codon
|
| 290 |
-
0.0558800511062145,NTv2 500M,70.0,human,intron
|
| 291 |
-
0.0185965970158576,BPNet arch. 6M,5.0,human,intron
|
| 292 |
-
0.2623045742511749,Residual CNN 44M,18.0,human,intron
|
| 293 |
-
0.3633092641830444,HyenaDNA 7M,68.0,human,intron
|
| 294 |
-
0.4261827170848846,Caduceus 7M,29.0,human,intron
|
| 295 |
-
0.4804849028587341,NTv3 8M (pre),1.0,human,intron
|
| 296 |
-
0.482195496559143,NTv3 100M (pre),2.0,human,intron
|
| 297 |
-
0.5425574779510498,NTv3 650M (pre),6.0,human,intron
|
| 298 |
-
0.5443048477172852,NTv3 650M (pos),8.0,human,intron
|
| 299 |
-
0.3958893716335296,NTv2 500M,71.0,human,exon
|
| 300 |
-
0.2360571771860122,BPNet arch. 6M,5.0,human,exon
|
| 301 |
-
0.3744256496429443,Residual CNN 44M,18.0,human,exon
|
| 302 |
-
0.1936572045087814,HyenaDNA 7M,68.0,human,exon
|
| 303 |
-
0.5046994090080261,Caduceus 7M,29.0,human,exon
|
| 304 |
-
0.6339762210845947,NTv3 8M (pre),1.0,human,exon
|
| 305 |
-
0.6433913111686707,NTv3 100M (pre),2.0,human,exon
|
| 306 |
-
0.6518793702125549,NTv3 650M (pre),6.0,human,exon
|
| 307 |
-
0.6812491416931152,NTv3 650M (pos),8.0,human,exon
|
| 308 |
-
0.1248077526688575,NTv2 500M,70.0,human,splice acceptor
|
| 309 |
-
0.3842235207557678,BPNet arch. 6M,5.0,human,splice acceptor
|
| 310 |
-
0.6810190081596375,Residual CNN 44M,18.0,human,splice acceptor
|
| 311 |
-
0.0527583621442317,HyenaDNA 7M,17.0,human,splice acceptor
|
| 312 |
-
0.7072214484214783,Caduceus 7M,29.0,human,splice acceptor
|
| 313 |
-
0.7796080708503723,NTv3 8M (pre),1.0,human,splice acceptor
|
| 314 |
-
0.7596970200538635,NTv3 100M (pre),2.0,human,splice acceptor
|
| 315 |
-
0.7915040850639343,NTv3 650M (pre),6.0,human,splice acceptor
|
| 316 |
-
0.7957100868225098,NTv3 650M (pos),8.0,human,splice acceptor
|
| 317 |
-
0.1267423331737518,NTv2 500M,70.0,human,start codon
|
| 318 |
-
0.1114460304379463,BPNet arch. 6M,5.0,human,start codon
|
| 319 |
-
0.3342535495758056,Residual CNN 44M,18.0,human,start codon
|
| 320 |
-
0.1215013489127159,HyenaDNA 7M,18.0,human,start codon
|
| 321 |
-
0.4082835018634796,Caduceus 7M,29.0,human,start codon
|
| 322 |
-
0.5167152881622314,NTv3 8M (pre),1.0,human,start codon
|
| 323 |
-
0.5340564250946045,NTv3 100M (pre),2.0,human,start codon
|
| 324 |
-
0.6148532032966614,NTv3 650M (pre),6.0,human,start codon
|
| 325 |
-
0.6582212448120117,NTv3 650M (pos),8.0,human,start codon
|
| 326 |
-
0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
|
| 327 |
-
0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
|
| 328 |
-
0.6582212448120117,Caduceus 7M,8.0,human,start codon
|
| 329 |
-
0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
|
| 330 |
-
0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
|
| 331 |
-
0.6582212448120117,Caduceus 7M,8.0,human,start codon
|
| 332 |
-
0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
|
| 333 |
-
0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
|
| 334 |
-
0.6582212448120117,Caduceus 7M,8.0,human,start codon
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/bigwig_dataset.csv
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/ntv3_benchmark_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/streamlit_app.py
CHANGED
|
@@ -82,6 +82,7 @@ ASSAY_COLORS = {
|
|
| 82 |
"splice acceptor": '#ff9900',
|
| 83 |
"start codon": '#9933cc',
|
| 84 |
}
|
|
|
|
| 85 |
|
| 86 |
MODEL_COLORS = {
|
| 87 |
"NTv3 650M (pos)": COLORS['blue_0'],
|
|
@@ -93,7 +94,8 @@ MODEL_COLORS = {
|
|
| 93 |
"BPNet arch. 6M": COLORS['cyan_1'],
|
| 94 |
"Residual CNN 44M": COLORS['magenta_1'],
|
| 95 |
"PlantCAD2 88M": COLORS["purple_1"],
|
| 96 |
-
"Caduceus 7M": COLORS["purple_2"]
|
|
|
|
| 97 |
}
|
| 98 |
|
| 99 |
MODEL_TRAINING_STATUS = {
|
|
@@ -107,6 +109,7 @@ MODEL_TRAINING_STATUS = {
|
|
| 107 |
"NTv2 500M": "PRE",
|
| 108 |
"BPNet arch. 6M": "SCRATCH",
|
| 109 |
"PlantCAD2 88M": "PRE",
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
MODEL_GPU_MULTIPLIER = {
|
|
@@ -152,8 +155,7 @@ HERE = os.path.dirname(os.path.abspath(__file__)) # /app/src
|
|
| 152 |
PROJECT_ROOT = os.path.dirname(HERE) # /app
|
| 153 |
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
|
| 154 |
|
| 155 |
-
|
| 156 |
-
MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
|
| 157 |
|
| 158 |
# ---------------------------------------------------------------------
|
| 159 |
# Data loading & preprocessing
|
|
@@ -162,122 +164,108 @@ MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
|
|
| 162 |
|
| 163 |
@st.cache_data
|
| 164 |
def load_raw_data():
|
| 165 |
-
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
-
pearson_df.columns = [c.strip() for c in pearson_df.columns]
|
| 169 |
-
mcc_df.columns = [c.strip() for c in mcc_df.columns]
|
| 170 |
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
-
|
| 174 |
-
return df.rename(columns={"running_time_hours": "GPU hours"})
|
| 175 |
|
| 176 |
|
| 177 |
-
|
| 178 |
-
def load_expanded_data():
|
| 179 |
"""
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
and convert into a unified schema:
|
| 183 |
-
species, assay_type?, datasets, Model, Score
|
| 184 |
-
|
| 185 |
-
For Pearson:
|
| 186 |
-
If multiple rows share (species, assay_type, datasets, Model),
|
| 187 |
-
we average their Score.
|
| 188 |
-
|
| 189 |
-
For MCC:
|
| 190 |
-
If multiple rows share (species, datasets, Model),
|
| 191 |
-
we average their Score.
|
| 192 |
"""
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
mcc_df = _normalize_training_hours(mcc_df)
|
| 196 |
-
|
| 197 |
-
#if "track_name_clean" in pearson_df.columns:
|
| 198 |
-
# pearson_df = pearson_df.drop(columns=["datasets"], errors="ignore")
|
| 199 |
-
# pearson_df = pearson_df.rename(columns={"track_name_clean": "datasets"})
|
| 200 |
-
|
| 201 |
-
# --- Pearson correlations ---
|
| 202 |
-
# Expect columns: species, assay_type, datasets, model_name, pearson correlation
|
| 203 |
-
pearson_df = pearson_df.rename(
|
| 204 |
-
columns={
|
| 205 |
-
"model_name": "Model",
|
| 206 |
-
"pearson correlation": "Score",
|
| 207 |
-
}
|
| 208 |
-
)
|
| 209 |
|
| 210 |
-
# --- Keep track_name_clean available (for head-to-head only later) ---
|
| 211 |
-
pearson_track_map = None
|
| 212 |
-
if "track_name_clean" in pearson_df.columns:
|
| 213 |
-
map_keys = ["species", "datasets"]
|
| 214 |
-
if "assay_type" in pearson_df.columns:
|
| 215 |
-
map_keys.append("assay_type")
|
| 216 |
|
| 217 |
-
pearson_track_map = (
|
| 218 |
-
pearson_df[map_keys + ["track_name_clean"]]
|
| 219 |
-
.dropna(subset=["track_name_clean"])
|
| 220 |
-
.drop_duplicates()
|
| 221 |
-
)
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
|
|
|
|
|
|
|
|
|
| 224 |
pearson_group_cols = ["species", "datasets", "Model"]
|
| 225 |
-
if "assay_type" in
|
| 226 |
pearson_group_cols.append("assay_type")
|
| 227 |
|
| 228 |
-
agg_cols = {"Score": "mean"}
|
| 229 |
-
if "GPU hours" in pearson_df.columns:
|
| 230 |
-
agg_cols["GPU hours"] = "mean"
|
| 231 |
-
|
| 232 |
-
# --- after aggregation ---
|
| 233 |
pearson_df = (
|
| 234 |
-
|
| 235 |
.groupby(pearson_group_cols, as_index=False, dropna=False)
|
| 236 |
-
.agg(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
)
|
| 238 |
|
| 239 |
-
# ✅
|
| 240 |
-
if
|
| 241 |
-
|
|
|
|
|
|
|
| 242 |
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
if "assay_type" in pearson_df.columns:
|
| 245 |
pearson_df["assay_type"] = (
|
| 246 |
-
pearson_df["assay_type"]
|
| 247 |
-
.map(ASSAY_TYPE_MAPPING)
|
| 248 |
-
.fillna("Other")
|
| 249 |
)
|
| 250 |
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
#
|
| 254 |
-
# Expect columns: species, datasets, model_name, MCC
|
| 255 |
-
mcc_df = mcc_df.rename(
|
| 256 |
-
columns={
|
| 257 |
-
"model_name": "Model",
|
| 258 |
-
"MCC": "Score",
|
| 259 |
-
}
|
| 260 |
-
)
|
| 261 |
-
|
| 262 |
-
# Collapse duplicates with same (species, datasets, Model)
|
| 263 |
-
mcc_group_cols = ["species", "datasets", "Model"]
|
| 264 |
-
agg_cols = {"Score": "mean"}
|
| 265 |
-
if "GPU hours" in mcc_df.columns:
|
| 266 |
-
agg_cols["GPU hours"] = "mean"
|
| 267 |
-
|
| 268 |
mcc_df = (
|
| 269 |
-
|
| 270 |
-
.groupby(
|
| 271 |
-
.agg(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
)
|
| 273 |
|
| 274 |
-
# Optional sanity checks
|
| 275 |
-
for df_name, df in [("pearson", pearson_df), ("mcc", mcc_df)]:
|
| 276 |
-
required = {"species", "datasets", "Model", "Score"}
|
| 277 |
-
missing = required - set(df.columns)
|
| 278 |
-
if missing:
|
| 279 |
-
st.error(f"{df_name} dataframe missing columns: {missing}")
|
| 280 |
-
|
| 281 |
return pearson_df, mcc_df
|
| 282 |
|
| 283 |
|
|
@@ -510,45 +498,6 @@ def plot_breakdown_facets_sorted_models(
|
|
| 510 |
|
| 511 |
return fig
|
| 512 |
|
| 513 |
-
|
| 514 |
-
def build_radar_df(
|
| 515 |
-
benchmark_name: str,
|
| 516 |
-
selected_species: List[str],
|
| 517 |
-
selected_assays: List[str],
|
| 518 |
-
selected_models: List[str],
|
| 519 |
-
selected_datasets: List[str],
|
| 520 |
-
) -> pd.DataFrame:
|
| 521 |
-
cfg = _BENCHMARKS[benchmark_name]
|
| 522 |
-
|
| 523 |
-
df = filter_base_df(
|
| 524 |
-
benchmark_name,
|
| 525 |
-
selected_species,
|
| 526 |
-
selected_assays,
|
| 527 |
-
selected_models,
|
| 528 |
-
selected_datasets,
|
| 529 |
-
)
|
| 530 |
-
|
| 531 |
-
if df.empty:
|
| 532 |
-
return pd.DataFrame()
|
| 533 |
-
|
| 534 |
-
# Choose axis column
|
| 535 |
-
if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
|
| 536 |
-
axis_col = "assay_type"
|
| 537 |
-
axis_label = "Assay type"
|
| 538 |
-
else:
|
| 539 |
-
axis_col = "datasets"
|
| 540 |
-
axis_label = "Dataset"
|
| 541 |
-
|
| 542 |
-
radar_df = (
|
| 543 |
-
df.groupby([axis_col, "Model"], as_index=False)["Score"]
|
| 544 |
-
.mean()
|
| 545 |
-
.rename(columns={axis_col: "Axis", "Score": "Value"})
|
| 546 |
-
)
|
| 547 |
-
|
| 548 |
-
radar_df.attrs["axis_label"] = axis_label
|
| 549 |
-
return radar_df
|
| 550 |
-
|
| 551 |
-
|
| 552 |
def build_pairwise_scatter_df(
|
| 553 |
benchmark_name: str,
|
| 554 |
selected_species: List[str],
|
|
@@ -558,19 +507,8 @@ def build_pairwise_scatter_df(
|
|
| 558 |
model_a: str,
|
| 559 |
model_b: str,
|
| 560 |
) -> pd.DataFrame:
|
| 561 |
-
"""
|
| 562 |
-
Returns a per-track dataframe with columns:
|
| 563 |
-
Track, Model A, Model B, (optional) species, (optional) assay_type, datasets
|
| 564 |
-
Where each row corresponds to a specific track (datasets [+ assay_type]).
|
| 565 |
-
|
| 566 |
-
Special case:
|
| 567 |
-
If `track_name_clean` exists (typically for bigwig Functional Tracks),
|
| 568 |
-
we use it ONLY for the head-to-head "Track" label (and track identity),
|
| 569 |
-
while keeping the rest of the app using `datasets`.
|
| 570 |
-
"""
|
| 571 |
cfg = _BENCHMARKS[benchmark_name]
|
| 572 |
|
| 573 |
-
# Ensure chosen models are included even if toggles exclude them
|
| 574 |
models_for_filter = (
|
| 575 |
list(set(selected_models + [model_a, model_b]))
|
| 576 |
if selected_models else [model_a, model_b]
|
|
@@ -583,24 +521,18 @@ def build_pairwise_scatter_df(
|
|
| 583 |
models_for_filter,
|
| 584 |
selected_datasets,
|
| 585 |
)
|
| 586 |
-
|
| 587 |
if df.empty:
|
| 588 |
return pd.DataFrame()
|
| 589 |
|
| 590 |
-
#
|
| 591 |
-
#
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
# Define what “a specific track” means
|
| 595 |
-
track_cols = [track_id_col]
|
| 596 |
if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
|
| 597 |
-
track_cols = ["assay_type",
|
| 598 |
|
| 599 |
-
# Keep species in hover if multiple are selected
|
| 600 |
keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
|
| 601 |
id_cols = (["species"] if keep_species else []) + track_cols
|
| 602 |
|
| 603 |
-
# Pivot into two model columns
|
| 604 |
wide = (
|
| 605 |
df[df["Model"].isin([model_a, model_b])]
|
| 606 |
.pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
|
|
@@ -612,26 +544,28 @@ def build_pairwise_scatter_df(
|
|
| 612 |
|
| 613 |
wide = wide.dropna(subset=[model_a, model_b])
|
| 614 |
|
| 615 |
-
#
|
| 616 |
if "assay_type" in wide.columns:
|
| 617 |
-
wide["Track"] = wide["assay_type"].astype(str) + " / " + wide[
|
| 618 |
else:
|
| 619 |
-
wide["Track"] = wide[
|
| 620 |
|
| 621 |
-
# Rename for plotting
|
| 622 |
wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
|
| 623 |
|
| 624 |
-
#
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
|
|
|
|
|
|
| 631 |
|
| 632 |
return wide
|
| 633 |
|
| 634 |
|
|
|
|
| 635 |
def build_violin_df(
|
| 636 |
benchmark_name: str,
|
| 637 |
selected_species: List[str],
|
|
@@ -655,75 +589,15 @@ def build_violin_df(
|
|
| 655 |
return df[keep].copy()
|
| 656 |
|
| 657 |
|
| 658 |
-
def plot_radar(
|
| 659 |
-
radar_df: pd.DataFrame,
|
| 660 |
-
metric_label: str,
|
| 661 |
-
height: int = 600,
|
| 662 |
-
):
|
| 663 |
-
if radar_df.empty:
|
| 664 |
-
return None
|
| 665 |
-
|
| 666 |
-
axes = radar_df["Axis"].unique().tolist()
|
| 667 |
-
|
| 668 |
-
# Global radial range
|
| 669 |
-
r_min = radar_df["Value"].min()
|
| 670 |
-
r_max = radar_df["Value"].max()
|
| 671 |
-
pad = 0.05 * (r_max - r_min if r_max > r_min else 1.0)
|
| 672 |
-
r_range = [r_min - pad, r_max + pad]
|
| 673 |
-
|
| 674 |
-
fig = go.Figure()
|
| 675 |
-
|
| 676 |
-
for model in radar_df["Model"].unique():
|
| 677 |
-
sub = radar_df[radar_df["Model"] == model]
|
| 678 |
-
|
| 679 |
-
# Ensure consistent axis ordering
|
| 680 |
-
sub = sub.set_index("Axis").reindex(axes)
|
| 681 |
-
|
| 682 |
-
fig.add_trace(
|
| 683 |
-
go.Scatterpolar(
|
| 684 |
-
r=sub["Value"],
|
| 685 |
-
theta=axes,
|
| 686 |
-
fill="toself",
|
| 687 |
-
name=model,
|
| 688 |
-
line_color=MODEL_COLORS.get(model),
|
| 689 |
-
opacity=0.75,
|
| 690 |
-
)
|
| 691 |
-
)
|
| 692 |
-
|
| 693 |
-
fig.update_layout(
|
| 694 |
-
height=height,
|
| 695 |
-
polar=dict(
|
| 696 |
-
bgcolor="rgba(0,0,0,0)", # 👈 polar background
|
| 697 |
-
radialaxis=dict(
|
| 698 |
-
title=metric_label,
|
| 699 |
-
range=r_range,
|
| 700 |
-
tickformat=".2f",
|
| 701 |
-
showgrid=True,
|
| 702 |
-
gridcolor="rgba(0,0,0,0.15)", # subtle grid
|
| 703 |
-
),
|
| 704 |
-
angularaxis=dict(
|
| 705 |
-
showgrid=True,
|
| 706 |
-
gridcolor="rgba(0,0,0,0.15)",
|
| 707 |
-
),
|
| 708 |
-
),
|
| 709 |
-
paper_bgcolor="rgba(0,0,0,0)", # 👈 entire figure background
|
| 710 |
-
plot_bgcolor="rgba(0,0,0,0)", # 👈 plot area
|
| 711 |
-
showlegend=True,
|
| 712 |
-
legend_title_text="Model",
|
| 713 |
-
margin=dict(t=40, b=40, l=40, r=40),
|
| 714 |
-
)
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
return fig
|
| 718 |
-
|
| 719 |
-
|
| 720 |
def build_convergence_df(
|
| 721 |
benchmark_name: str,
|
| 722 |
selected_species: List[str],
|
| 723 |
selected_assays: List[str],
|
| 724 |
selected_models: List[str],
|
| 725 |
selected_datasets: List[str],
|
|
|
|
| 726 |
) -> pd.DataFrame:
|
|
|
|
| 727 |
df = filter_base_df(
|
| 728 |
benchmark_name,
|
| 729 |
selected_species,
|
|
@@ -732,25 +606,58 @@ def build_convergence_df(
|
|
| 732 |
selected_datasets,
|
| 733 |
)
|
| 734 |
|
| 735 |
-
if df.empty
|
| 736 |
-
return pd.DataFrame(columns=["Model", "
|
| 737 |
|
|
|
|
| 738 |
out = (
|
| 739 |
df.groupby("Model", as_index=False)
|
| 740 |
-
.agg({"Score": "mean"
|
| 741 |
.rename(columns={"Score": "Performance"})
|
| 742 |
)
|
| 743 |
|
| 744 |
-
#
|
| 745 |
-
|
| 746 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
|
| 748 |
-
out = out.dropna(subset=["GPU hours", "Performance"])
|
| 749 |
out["Performance"] = out["Performance"].round(3)
|
| 750 |
-
out["GPU hours"] = out["GPU hours"].round(1)
|
| 751 |
|
| 752 |
return out
|
| 753 |
|
|
|
|
| 754 |
# ---------------------------------------------------------------------
|
| 755 |
# UI helpers
|
| 756 |
# ---------------------------------------------------------------------
|
|
@@ -893,7 +800,7 @@ def main():
|
|
| 893 |
)
|
| 894 |
fig.update_layout(
|
| 895 |
barmode="group",
|
| 896 |
-
height=
|
| 897 |
xaxis_title="",
|
| 898 |
yaxis_title=cfg["metric_label"],
|
| 899 |
plot_bgcolor="rgba(0,0,0,0)",
|
|
@@ -970,11 +877,9 @@ def main():
|
|
| 970 |
pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
|
| 971 |
axis_range = [min_v - pad, max_v + pad]
|
| 972 |
tick_step = (axis_range[1] - axis_range[0]) / 5
|
| 973 |
-
|
| 974 |
-
hover_cols = []
|
| 975 |
-
|
| 976 |
-
# Prefer track_name_clean; fall back to Track if not present
|
| 977 |
-
if "track_name_clean" in scatter_df.columns:
|
| 978 |
hover_cols.append("track_name_clean")
|
| 979 |
else:
|
| 980 |
hover_cols.append("datasets")
|
|
@@ -1038,49 +943,57 @@ def main():
|
|
| 1038 |
with right:
|
| 1039 |
st.markdown("#### ⏱️ Time to convergence")
|
| 1040 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1041 |
conv_df = build_convergence_df(
|
| 1042 |
benchmark_name,
|
| 1043 |
selected_species,
|
| 1044 |
selected_assays,
|
| 1045 |
selected_models,
|
| 1046 |
selected_datasets,
|
|
|
|
| 1047 |
)
|
| 1048 |
|
| 1049 |
if conv_df.empty:
|
| 1050 |
-
st.info("No
|
| 1051 |
else:
|
| 1052 |
fig_conv = px.scatter(
|
| 1053 |
conv_df,
|
| 1054 |
-
x="
|
| 1055 |
y="Performance",
|
| 1056 |
text="Model",
|
| 1057 |
-
color="Model",
|
| 1058 |
-
color_discrete_map=MODEL_COLORS,
|
| 1059 |
-
hover_data=["Model", "
|
| 1060 |
)
|
| 1061 |
-
|
| 1062 |
-
fig_conv.update_traces(textposition="top center")
|
| 1063 |
fig_conv.update_layout(
|
| 1064 |
-
height=
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
plot_bgcolor="rgba(0,0,0,0)",
|
| 1068 |
paper_bgcolor="rgba(0,0,0,0)",
|
| 1069 |
-
showlegend=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1070 |
)
|
| 1071 |
|
| 1072 |
-
|
|
|
|
|
|
|
| 1073 |
type="log",
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
)
|
| 1077 |
-
|
| 1078 |
-
# optional: hide legend if labels already on points
|
| 1079 |
-
# fig_conv.update_layout(showlegend=False)
|
| 1080 |
-
|
| 1081 |
st.plotly_chart(fig_conv, use_container_width=True)
|
| 1082 |
|
| 1083 |
-
|
| 1084 |
# ------------------------------------------------------------------
|
| 1085 |
# Violin (full width, below)
|
| 1086 |
# ------------------------------------------------------------------
|
|
|
|
| 82 |
"splice acceptor": '#ff9900',
|
| 83 |
"start codon": '#9933cc',
|
| 84 |
}
|
| 85 |
+
ASSAY_COLORS["Other"] = "#808080"
|
| 86 |
|
| 87 |
MODEL_COLORS = {
|
| 88 |
"NTv3 650M (pos)": COLORS['blue_0'],
|
|
|
|
| 94 |
"BPNet arch. 6M": COLORS['cyan_1'],
|
| 95 |
"Residual CNN 44M": COLORS['magenta_1'],
|
| 96 |
"PlantCAD2 88M": COLORS["purple_1"],
|
| 97 |
+
"Caduceus 7M": COLORS["purple_2"],
|
| 98 |
+
"HyenaDNA 7M": COLORS["yellow_2"]
|
| 99 |
}
|
| 100 |
|
| 101 |
MODEL_TRAINING_STATUS = {
|
|
|
|
| 109 |
"NTv2 500M": "PRE",
|
| 110 |
"BPNet arch. 6M": "SCRATCH",
|
| 111 |
"PlantCAD2 88M": "PRE",
|
| 112 |
+
"HyenaDNA 7M": "PRE"
|
| 113 |
}
|
| 114 |
|
| 115 |
MODEL_GPU_MULTIPLIER = {
|
|
|
|
| 155 |
PROJECT_ROOT = os.path.dirname(HERE) # /app
|
| 156 |
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
|
| 157 |
|
| 158 |
+
SINGLE_TABLE_PATH = os.path.join(DATA_DIR, "ntv3_benchmark_results.csv")
|
|
|
|
| 159 |
|
| 160 |
# ---------------------------------------------------------------------
|
| 161 |
# Data loading & preprocessing
|
|
|
|
| 164 |
|
| 165 |
@st.cache_data
|
| 166 |
def load_raw_data():
|
| 167 |
+
df = pd.read_csv(SINGLE_TABLE_PATH)
|
| 168 |
+
df.columns = [c.strip() for c in df.columns]
|
| 169 |
+
return df
|
| 170 |
|
|
|
|
|
|
|
| 171 |
|
| 172 |
+
def _normalize_training_time_to_gpu_hours(df: pd.DataFrame) -> pd.DataFrame:
|
| 173 |
+
"""
|
| 174 |
+
Your new column is `running_time`. In your sample it looks like seconds
|
| 175 |
+
(e.g. 317034 ~= 88 hours). We'll convert to hours if values look like seconds.
|
| 176 |
+
"""
|
| 177 |
+
if "running_time" not in df.columns:
|
| 178 |
+
return df
|
| 179 |
+
|
| 180 |
+
rt = pd.to_numeric(df["running_time"], errors="coerce")
|
| 181 |
+
# Heuristic: if median is huge, it's probably seconds -> convert to hours
|
| 182 |
+
# (88 hours = 316800 seconds is a typical-looking value in your sample)
|
| 183 |
+
if rt.dropna().median() > 10_000:
|
| 184 |
+
df["GPU hours"] = rt / 3600.0
|
| 185 |
+
else:
|
| 186 |
+
df["GPU hours"] = rt.astype(float)
|
| 187 |
|
| 188 |
+
return df
|
|
|
|
| 189 |
|
| 190 |
|
| 191 |
+
def _best_step_time_to_hours(s: pd.Series) -> pd.Series:
|
|
|
|
| 192 |
"""
|
| 193 |
+
Converts strings like '3 days 04:26:26.467000' to hours (float).
|
| 194 |
+
Works with pandas Timedelta parsing.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
"""
|
| 196 |
+
td = pd.to_timedelta(s, errors="coerce")
|
| 197 |
+
return td.dt.total_seconds() / 3600.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
+
@st.cache_data
|
| 202 |
+
def load_expanded_data():
|
| 203 |
+
df = load_raw_data().copy()
|
| 204 |
+
|
| 205 |
+
df = df.rename(columns={"Metric": "Score", "model_name": "Model"})
|
| 206 |
+
df["Score"] = pd.to_numeric(df["Score"], errors="coerce")
|
| 207 |
+
|
| 208 |
+
if "best_step" in df.columns:
|
| 209 |
+
df["best_step"] = pd.to_numeric(df["best_step"], errors="coerce")
|
| 210 |
+
|
| 211 |
+
if "best_step_time" in df.columns:
|
| 212 |
+
df["best_step_time_hours"] = _best_step_time_to_hours(df["best_step_time"])
|
| 213 |
+
else:
|
| 214 |
+
df["best_step_time_hours"] = np.nan
|
| 215 |
+
|
| 216 |
+
is_annot = df.get("assay_type", "").astype(str).eq("Annotation")
|
| 217 |
+
pearson_raw = df[~is_annot].copy()
|
| 218 |
+
mcc_raw = df[is_annot].copy()
|
| 219 |
|
| 220 |
+
# -------------------------
|
| 221 |
+
# Functional Tracks (Pearson)
|
| 222 |
+
# -------------------------
|
| 223 |
pearson_group_cols = ["species", "datasets", "Model"]
|
| 224 |
+
if "assay_type" in pearson_raw.columns:
|
| 225 |
pearson_group_cols.append("assay_type")
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
pearson_df = (
|
| 228 |
+
pearson_raw
|
| 229 |
.groupby(pearson_group_cols, as_index=False, dropna=False)
|
| 230 |
+
.agg({
|
| 231 |
+
"Score": "mean",
|
| 232 |
+
"best_step": "mean",
|
| 233 |
+
"best_step_time_hours": "mean",
|
| 234 |
+
})
|
| 235 |
)
|
| 236 |
|
| 237 |
+
# ✅ merge track_name_clean WHILE assay_type is still raw
|
| 238 |
+
if "track_name_clean" in pearson_raw.columns:
|
| 239 |
+
map_keys = ["species", "datasets"]
|
| 240 |
+
if "assay_type" in pearson_raw.columns:
|
| 241 |
+
map_keys.append("assay_type")
|
| 242 |
|
| 243 |
+
track_map = (
|
| 244 |
+
pearson_raw[map_keys + ["track_name_clean"]]
|
| 245 |
+
.dropna(subset=["track_name_clean"])
|
| 246 |
+
.drop_duplicates()
|
| 247 |
+
)
|
| 248 |
+
pearson_df = pearson_df.merge(track_map, on=map_keys, how="left")
|
| 249 |
+
|
| 250 |
+
# ✅ now it’s safe to map assay_type to categories
|
| 251 |
if "assay_type" in pearson_df.columns:
|
| 252 |
pearson_df["assay_type"] = (
|
| 253 |
+
pearson_df["assay_type"].astype(str).map(ASSAY_TYPE_MAPPING).fillna("Other")
|
|
|
|
|
|
|
| 254 |
)
|
| 255 |
|
| 256 |
+
# -------------------------
|
| 257 |
+
# Genome Annotation (MCC)
|
| 258 |
+
# -------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
mcc_df = (
|
| 260 |
+
mcc_raw
|
| 261 |
+
.groupby(["species", "datasets", "Model"], as_index=False, dropna=False)
|
| 262 |
+
.agg({
|
| 263 |
+
"Score": "mean",
|
| 264 |
+
"best_step": "mean",
|
| 265 |
+
"best_step_time_hours": "mean",
|
| 266 |
+
})
|
| 267 |
)
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
return pearson_df, mcc_df
|
| 270 |
|
| 271 |
|
|
|
|
| 498 |
|
| 499 |
return fig
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
def build_pairwise_scatter_df(
|
| 502 |
benchmark_name: str,
|
| 503 |
selected_species: List[str],
|
|
|
|
| 507 |
model_a: str,
|
| 508 |
model_b: str,
|
| 509 |
) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
cfg = _BENCHMARKS[benchmark_name]
|
| 511 |
|
|
|
|
| 512 |
models_for_filter = (
|
| 513 |
list(set(selected_models + [model_a, model_b]))
|
| 514 |
if selected_models else [model_a, model_b]
|
|
|
|
| 521 |
models_for_filter,
|
| 522 |
selected_datasets,
|
| 523 |
)
|
|
|
|
| 524 |
if df.empty:
|
| 525 |
return pd.DataFrame()
|
| 526 |
|
| 527 |
+
# ---- define "track identity" for head-to-head ----
|
| 528 |
+
# Always use datasets for the identity (x/y points)
|
| 529 |
+
track_cols = ["datasets"]
|
|
|
|
|
|
|
|
|
|
| 530 |
if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
|
| 531 |
+
track_cols = ["assay_type", "datasets"]
|
| 532 |
|
|
|
|
| 533 |
keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
|
| 534 |
id_cols = (["species"] if keep_species else []) + track_cols
|
| 535 |
|
|
|
|
| 536 |
wide = (
|
| 537 |
df[df["Model"].isin([model_a, model_b])]
|
| 538 |
.pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
|
|
|
|
| 544 |
|
| 545 |
wide = wide.dropna(subset=[model_a, model_b])
|
| 546 |
|
| 547 |
+
# Nice display label: use datasets (not track_name_clean)
|
| 548 |
if "assay_type" in wide.columns:
|
| 549 |
+
wide["Track"] = wide["assay_type"].astype(str) + " / " + wide["datasets"].astype(str)
|
| 550 |
else:
|
| 551 |
+
wide["Track"] = wide["datasets"].astype(str)
|
| 552 |
|
|
|
|
| 553 |
wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
|
| 554 |
|
| 555 |
+
# ---- Pearson-only: merge track_name_clean for hover ----
|
| 556 |
+
if benchmark_name == "Functional Tracks" and "track_name_clean" in df.columns:
|
| 557 |
+
merge_keys = id_cols.copy() # species? + assay_type? + datasets
|
| 558 |
+
track_map = (
|
| 559 |
+
df[merge_keys + ["track_name_clean"]]
|
| 560 |
+
.dropna(subset=["track_name_clean"])
|
| 561 |
+
.drop_duplicates()
|
| 562 |
+
)
|
| 563 |
+
wide = wide.merge(track_map, on=merge_keys, how="left")
|
| 564 |
|
| 565 |
return wide
|
| 566 |
|
| 567 |
|
| 568 |
+
|
| 569 |
def build_violin_df(
|
| 570 |
benchmark_name: str,
|
| 571 |
selected_species: List[str],
|
|
|
|
| 589 |
return df[keep].copy()
|
| 590 |
|
| 591 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
def build_convergence_df(
|
| 593 |
benchmark_name: str,
|
| 594 |
selected_species: List[str],
|
| 595 |
selected_assays: List[str],
|
| 596 |
selected_models: List[str],
|
| 597 |
selected_datasets: List[str],
|
| 598 |
+
x_mode: str = "best_step", # "best_step" | "best_step_time"
|
| 599 |
) -> pd.DataFrame:
|
| 600 |
+
|
| 601 |
df = filter_base_df(
|
| 602 |
benchmark_name,
|
| 603 |
selected_species,
|
|
|
|
| 606 |
selected_datasets,
|
| 607 |
)
|
| 608 |
|
| 609 |
+
if df.empty:
|
| 610 |
+
return pd.DataFrame(columns=["Model", "X", "Performance"])
|
| 611 |
|
| 612 |
+
# Mean performance per model
|
| 613 |
out = (
|
| 614 |
df.groupby("Model", as_index=False)
|
| 615 |
+
.agg({"Score": "mean"})
|
| 616 |
.rename(columns={"Score": "Performance"})
|
| 617 |
)
|
| 618 |
|
| 619 |
+
# -------------------------
|
| 620 |
+
# X axis selection
|
| 621 |
+
# -------------------------
|
| 622 |
+
if x_mode == "Steps":
|
| 623 |
+
if "best_step" not in df.columns:
|
| 624 |
+
return pd.DataFrame(columns=["Model", "X", "Performance"])
|
| 625 |
+
|
| 626 |
+
x = (
|
| 627 |
+
df.groupby("Model", as_index=False)["best_step"]
|
| 628 |
+
.mean()
|
| 629 |
+
.rename(columns={"best_step": "X"})
|
| 630 |
+
)
|
| 631 |
+
|
| 632 |
+
else: # best_step_time (GPU hours)
|
| 633 |
+
if "best_step_time_hours" not in df.columns:
|
| 634 |
+
return pd.DataFrame(columns=["Model", "X", "Performance"])
|
| 635 |
+
|
| 636 |
+
x = (
|
| 637 |
+
df.groupby("Model", as_index=False)["best_step_time_hours"]
|
| 638 |
+
.mean()
|
| 639 |
+
.rename(columns={"best_step_time_hours": "X"})
|
| 640 |
+
)
|
| 641 |
+
|
| 642 |
+
# 👇 Apply GPU multiplier (Evo2 uses 8 GPUs)
|
| 643 |
+
gpu_multiplier = {
|
| 644 |
+
"Evo2 1B": 8,
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
x["X"] = x.apply(
|
| 648 |
+
lambda r: r["X"] * gpu_multiplier.get(r["Model"], 1),
|
| 649 |
+
axis=1,
|
| 650 |
+
)
|
| 651 |
+
|
| 652 |
+
# Merge + clean
|
| 653 |
+
out = out.merge(x, on="Model", how="left")
|
| 654 |
+
out = out.dropna(subset=["X", "Performance"])
|
| 655 |
|
|
|
|
| 656 |
out["Performance"] = out["Performance"].round(3)
|
|
|
|
| 657 |
|
| 658 |
return out
|
| 659 |
|
| 660 |
+
|
| 661 |
# ---------------------------------------------------------------------
|
| 662 |
# UI helpers
|
| 663 |
# ---------------------------------------------------------------------
|
|
|
|
| 800 |
)
|
| 801 |
fig.update_layout(
|
| 802 |
barmode="group",
|
| 803 |
+
height=500,
|
| 804 |
xaxis_title="",
|
| 805 |
yaxis_title=cfg["metric_label"],
|
| 806 |
plot_bgcolor="rgba(0,0,0,0)",
|
|
|
|
| 877 |
pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
|
| 878 |
axis_range = [min_v - pad, max_v + pad]
|
| 879 |
tick_step = (axis_range[1] - axis_range[0]) / 5
|
| 880 |
+
|
| 881 |
+
hover_cols = ["datasets"]
|
| 882 |
+
if benchmark_name == "Functional Tracks" and "track_name_clean" in scatter_df.columns:
|
|
|
|
|
|
|
| 883 |
hover_cols.append("track_name_clean")
|
| 884 |
else:
|
| 885 |
hover_cols.append("datasets")
|
|
|
|
| 943 |
with right:
|
| 944 |
st.markdown("#### ⏱️ Time to convergence")
|
| 945 |
|
| 946 |
+
x_mode = st.selectbox(
|
| 947 |
+
"X-axis",
|
| 948 |
+
options=["GPU hours", "Steps"],
|
| 949 |
+
index=0,
|
| 950 |
+
key=f"conv_x_mode_{benchmark_name}",
|
| 951 |
+
)
|
| 952 |
+
|
| 953 |
+
|
| 954 |
conv_df = build_convergence_df(
|
| 955 |
benchmark_name,
|
| 956 |
selected_species,
|
| 957 |
selected_assays,
|
| 958 |
selected_models,
|
| 959 |
selected_datasets,
|
| 960 |
+
x_mode=x_mode,
|
| 961 |
)
|
| 962 |
|
| 963 |
if conv_df.empty:
|
| 964 |
+
st.info("No convergence data found for the selected filters / x-axis mode.")
|
| 965 |
else:
|
| 966 |
fig_conv = px.scatter(
|
| 967 |
conv_df,
|
| 968 |
+
x="X",
|
| 969 |
y="Performance",
|
| 970 |
text="Model",
|
| 971 |
+
color="Model",
|
| 972 |
+
color_discrete_map=MODEL_COLORS,
|
| 973 |
+
hover_data=["Model", "X", "Performance"],
|
| 974 |
)
|
|
|
|
|
|
|
| 975 |
fig_conv.update_layout(
|
| 976 |
+
height=550,
|
| 977 |
+
xaxis_title=("GPU hours" if x_mode == "GPU hours" else x_mode),
|
| 978 |
+
yaxis_title=cfg["metric_label"],
|
| 979 |
plot_bgcolor="rgba(0,0,0,0)",
|
| 980 |
paper_bgcolor="rgba(0,0,0,0)",
|
| 981 |
+
showlegend=False, # ✅ no legend
|
| 982 |
+
)
|
| 983 |
+
fig_conv.update_traces(
|
| 984 |
+
marker=dict(size=14), # 👈 bigger dots
|
| 985 |
+
textposition="top center",
|
| 986 |
)
|
| 987 |
|
| 988 |
+
# Log scale only makes sense for hours (and sometimes best_step)
|
| 989 |
+
if x_mode in ["GPU hours"]:
|
| 990 |
+
fig_conv.update_xaxes(
|
| 991 |
type="log",
|
| 992 |
+
dtick=1,
|
| 993 |
+
minor=dict(ticks="", showgrid=False),
|
| 994 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
st.plotly_chart(fig_conv, use_container_width=True)
|
| 996 |
|
|
|
|
| 997 |
# ------------------------------------------------------------------
|
| 998 |
# Violin (full width, below)
|
| 999 |
# ------------------------------------------------------------------
|