Spaces:

InstaDeepAI
/

ntv3_benchmark

Running

App Files Files Community

MidAtBest commited on Dec 16, 2025

Commit

4c80381

1 Parent(s): d960a86

feat: use bernardo's dataset and fix scatterplot

Browse files

Files changed (4) hide show

data/bed_dataset.csv +0 -334
data/bigwig_dataset.csv +0 -0
data/ntv3_benchmark_results.csv +0 -0
src/streamlit_app.py +172 -259

data/bed_dataset.csv DELETED Viewed

@@ -1,334 +0,0 @@
-MCC,model_name,running_time_hours,species,datasets
-0.334637850522995,NTv2 500M,88.0,cattle,intron
-0.1238768473267555,BPNet arch. 6M,4.0,cattle,intron
-0.383470207452774,Residual CNN 44M,19.0,cattle,intron
-0.3828243613243103,HyenaDNA 7M,23.0,cattle,intron
-0.4733810424804687,Caduceus 7M,32.0,cattle,intron
-0.4315277338027954,Evo2 1B,43.0,cattle,intron
-0.5455867648124695,NTv3 8M (pre),1.0,cattle,intron
-0.5453664064407349,NTv3 100M (pre),2.0,cattle,intron
-0.5628412365913391,NTv3 650M (pre),5.0,cattle,intron
-0.5682631134986877,NTv3 650M (pos),7.0,cattle,intron
-0.3689357042312622,NTv2 500M,87.0,cattle,exon
-0.3250860869884491,BPNet arch. 6M,4.0,cattle,exon
-0.4674676060676574,Residual CNN 44M,19.0,cattle,exon
-0.2207767516374588,HyenaDNA 7M,21.0,cattle,exon
-0.4960922300815582,Caduceus 7M,32.0,cattle,exon
-0.4969632029533386,Evo2 1B,44.0,cattle,exon
-0.5432836413383484,NTv3 8M (pre),1.0,cattle,exon
-0.5531933307647705,NTv3 100M (pre),2.0,cattle,exon
-0.591151773929596,NTv3 650M (pre),5.0,cattle,exon
-0.6253225207328796,NTv3 650M (pos),7.0,cattle,exon
-0.118808165192604,NTv2 500M,86.0,cattle,splice acceptor
-0.4715546369552612,BPNet arch. 6M,4.0,cattle,splice acceptor
-0.6620649099349976,Residual CNN 44M,19.0,cattle,splice acceptor
-0.104436807334423,HyenaDNA 7M,22.0,cattle,splice acceptor
-0.7064619660377502,Caduceus 7M,30.0,cattle,splice acceptor
-0.2085049450397491,Evo2 1B,43.0,cattle,splice acceptor
-0.7254849076271057,NTv3 8M (pre),1.0,cattle,splice acceptor
-0.7404072880744934,NTv3 100M (pre),2.0,cattle,splice acceptor
-0.7732946872711182,NTv3 650M (pre),5.0,cattle,splice acceptor
-0.7679624557495117,NTv3 650M (pos),7.0,cattle,splice acceptor
-0.1412438601255417,NTv2 500M,88.0,cattle,start codon
-0.1490814685821533,BPNet arch. 6M,4.0,cattle,start codon
-0.3243320286273956,Residual CNN 44M,19.0,cattle,start codon
-0.056509330868721,HyenaDNA 7M,23.0,cattle,start codon
-0.3455557227134704,Caduceus 7M,33.0,cattle,start codon
-0.1030694246292114,Evo2 1B,43.0,cattle,start codon
-0.5275959968566895,NTv3 8M (pre),1.0,cattle,start codon
-0.4962065815925598,NTv3 100M (pre),2.0,cattle,start codon
-0.5591813921928406,NTv3 650M (pre),5.0,cattle,start codon
-0.5492052435874939,NTv3 650M (pos),7.0,cattle,start codon
-0.0383123345673084,NTv2 500M,90.0,cattle,intron
-0.1015273928642273,BPNet arch. 6M,7.0,cattle,intron
-0.3299930691719055,Residual CNN 44M,23.0,cattle,intron
-0.3826011121273041,HyenaDNA 7M,20.0,cattle,intron
-0.5564854741096497,Caduceus 7M,36.0,cattle,intron
-0.5645747780799866,NTv3 8M (pre),2.0,cattle,intron
-0.5765650272369385,NTv3 100M (pre),2.0,cattle,intron
-0.6140890121459961,NTv3 650M (pre),7.0,cattle,intron
-0.6709504723548889,NTv3 650M (pos),10.0,cattle,intron
-0.3665516376495361,NTv2 500M,88.0,cattle,exon
-0.323502242565155,BPNet arch. 6M,7.0,cattle,exon
-0.519285261631012,Residual CNN 44M,23.0,cattle,exon
-0.2514283955097198,HyenaDNA 7M,23.0,cattle,exon
-0.5072187781333923,Caduceus 7M,39.0,cattle,exon
-0.593974232673645,NTv3 8M (pre),1.0,cattle,exon
-0.6014777421951294,NTv3 100M (pre),2.0,cattle,exon
-0.6433462500572205,NTv3 650M (pre),7.0,cattle,exon
-0.6648420095443726,NTv3 650M (pos),9.0,cattle,exon
-0.0937248468399047,NTv2 500M,89.0,cattle,splice acceptor
-0.4435675740242004,BPNet arch. 6M,7.0,cattle,splice acceptor
-0.6590774655342102,Residual CNN 44M,23.0,cattle,splice acceptor
-0.1038060635328292,HyenaDNA 7M,21.0,cattle,splice acceptor
-0.6937510371208191,Caduceus 7M,38.0,cattle,splice acceptor
-0.7248824238777161,NTv3 8M (pre),1.0,cattle,splice acceptor
-0.7345820069313049,NTv3 100M (pre),2.0,cattle,splice acceptor
-0.7439091801643372,NTv3 650M (pre),7.0,cattle,splice acceptor
-0.758992075920105,NTv3 650M (pos),9.0,cattle,splice acceptor
-0.1127461418509483,NTv2 500M,88.0,cattle,start codon
-0.0901669710874557,BPNet arch. 6M,7.0,cattle,start codon
-0.3548502624034881,Residual CNN 44M,23.0,cattle,start codon
-0.0545537285506725,HyenaDNA 7M,24.0,cattle,start codon
-0.4038819670677185,Caduceus 7M,38.0,cattle,start codon
-0.5045616030693054,NTv3 8M (pre),1.0,cattle,start codon
-0.4762806594371795,NTv3 100M (pre),3.0,cattle,start codon
-0.5610686540603638,NTv3 650M (pre),7.0,cattle,start codon
-0.5782408118247986,NTv3 650M (pos),9.0,cattle,start codon
-0.1547228246927261,NTv2 500M,85.0,cattle,intron
-0.1383400112390518,BPNet arch. 6M,6.0,cattle,intron
-0.3266464471817016,Residual CNN 44M,23.0,cattle,intron
-0.4240079522132873,HyenaDNA 7M,23.0,cattle,intron
-0.4552704095840454,Caduceus 7M,37.0,cattle,intron
-0.5063548684120178,NTv3 8M (pre),1.0,cattle,intron
-0.5619235038757324,NTv3 100M (pre),3.0,cattle,intron
-0.531277596950531,NTv3 650M (pre),7.0,cattle,intron
-0.6205132603645325,NTv3 650M (pos),9.0,cattle,intron
-0.3413117229938507,NTv2 500M,87.0,cattle,exon
-0.2900931537151336,BPNet arch. 6M,7.0,cattle,exon
-0.4856111407279968,Residual CNN 44M,23.0,cattle,exon
-0.2246854901313781,HyenaDNA 7M,70.0,cattle,exon
-0.5370016098022461,Caduceus 7M,35.0,cattle,exon
-0.5721412897109985,NTv3 8M (pre),2.0,cattle,exon
-0.5819903612136841,NTv3 100M (pre),2.0,cattle,exon
-0.6183731555938721,NTv3 650M (pre),7.0,cattle,exon
-0.6233119964599609,NTv3 650M (pos),9.0,cattle,exon
-0.1367750316858291,NTv2 500M,89.0,cattle,splice acceptor
-0.4220209121704101,BPNet arch. 6M,7.0,cattle,splice acceptor
-0.689546525478363,Residual CNN 44M,23.0,cattle,splice acceptor
-0.1121769621968269,HyenaDNA 7M,69.0,cattle,splice acceptor
-0.7314619421958923,Caduceus 7M,37.0,cattle,splice acceptor
-0.74350905418396,NTv3 8M (pre),2.0,cattle,splice acceptor
-0.746654748916626,NTv3 100M (pre),2.0,cattle,splice acceptor
-0.7714020609855652,NTv3 650M (pre),7.0,cattle,splice acceptor
-0.7809271812438965,NTv3 650M (pos),9.0,cattle,splice acceptor
-0.0901266038417816,NTv2 500M,89.0,cattle,start codon
-0.0930091217160224,BPNet arch. 6M,6.0,cattle,start codon
-0.423166275024414,Residual CNN 44M,23.0,cattle,start codon
-0.1253955662250518,HyenaDNA 7M,72.0,cattle,start codon
-0.33419930934906,Caduceus 7M,37.0,cattle,start codon
-0.4639334082603454,NTv3 8M (pre),1.0,cattle,start codon
-0.5102551579475403,NTv3 100M (pre),2.0,cattle,start codon
-0.5866840481758118,NTv3 650M (pre),7.0,cattle,start codon
-0.588148832321167,NTv3 650M (pos),9.0,cattle,start codon
-0.4777896404266357,NTv2 500M,33.0,tomato,intron
-0.3216900527477264,BPNet arch. 6M,1.0,tomato,intron
-0.46840900182724,Residual CNN 44M,6.0,tomato,intron
-0.5251263380050659,PlantCAD2 88M,38.0,tomato,intron
-0.747674286365509,Evo2 1B,13.0,tomato,intron
-0.6858112812042236,NTv3 8M (pre),0.0,tomato,intron
-0.7038365006446838,NTv3 100M (pre),0.0,tomato,intron
-0.7481895685195923,NTv3 650M (pre),1.0,tomato,intron
-0.7458349466323853,NTv3 650M (pos),2.0,tomato,intron
-0.6147475838661194,NTv2 500M,33.0,tomato,exon
-0.4551227986812591,BPNet arch. 6M,1.0,tomato,exon
-0.5068296194076538,Residual CNN 44M,6.0,tomato,exon
-0.7256030440330505,PlantCAD2 88M,37.0,tomato,exon
-0.7006198763847351,Evo2 1B,14.0,tomato,exon
-0.7537696361541748,NTv3 8M (pre),0.0,tomato,exon
-0.7484462857246399,NTv3 100M (pre),0.0,tomato,exon
-0.764011561870575,NTv3 650M (pre),1.0,tomato,exon
-0.7750575542449951,NTv3 650M (pos),2.0,tomato,exon
-0.1691933125257492,NTv2 500M,33.0,tomato,splice acceptor
-0.125656172633171,BPNet arch. 6M,1.0,tomato,splice acceptor
-0.4359458982944488,Residual CNN 44M,6.0,tomato,splice acceptor
-0.744257926940918,PlantCAD2 88M,38.0,tomato,splice acceptor
-0.3791649639606476,Evo2 1B,13.0,tomato,splice acceptor
-0.6623862385749817,NTv3 8M (pre),0.0,tomato,splice acceptor
-0.6843105554580688,NTv3 100M (pre),0.0,tomato,splice acceptor
-0.7641868591308594,NTv3 650M (pre),1.0,tomato,splice acceptor
-0.7584431767463684,NTv3 650M (pos),2.0,tomato,splice acceptor
-0.132934883236885,NTv2 500M,34.0,tomato,start codon
-0.0,BPNet arch. 6M,1.0,tomato,start codon
-0.088478960096836,Residual CNN 44M,6.0,tomato,start codon
-0.2019559442996978,PlantCAD2 88M,38.0,tomato,start codon
-0.1622217148542404,Evo2 1B,13.0,tomato,start codon
-0.2966536581516266,NTv3 8M (pre),0.0,tomato,start codon
-0.3968957066535949,NTv3 100M (pre),0.0,tomato,start codon
-0.4830105900764465,NTv3 650M (pre),1.0,tomato,start codon
-0.5007501244544983,NTv3 650M (pos),2.0,tomato,start codon
-0.6770024299621582,NTv2 500M,33.0,tomato,intron
-0.2927957773208618,BPNet arch. 6M,2.0,tomato,intron
-0.557494580745697,Residual CNN 44M,8.0,tomato,intron
-0.7252154350280762,PlantCAD2 88M,46.0,tomato,intron
-0.712181031703949,NTv3 8M (pre),1.0,tomato,intron
-0.7515084147453308,NTv3 100M (pre),1.0,tomato,intron
-0.7400797009468079,NTv3 650M (pre),3.0,tomato,intron
-0.7532288432121277,NTv3 650M (pos),4.0,tomato,intron
-0.5751976370811462,NTv2 500M,33.0,tomato,exon
-0.3057552278041839,BPNet arch. 6M,3.0,tomato,exon
-0.5581462979316711,Residual CNN 44M,8.0,tomato,exon
-0.7699167728424072,PlantCAD2 88M,50.0,tomato,exon
-0.748009443283081,NTv3 8M (pre),1.0,tomato,exon
-0.7629056572914124,NTv3 100M (pre),1.0,tomato,exon
-0.7755228877067566,NTv3 650M (pre),3.0,tomato,exon
-0.782516598701477,NTv3 650M (pos),4.0,tomato,exon
-0.168193981051445,NTv2 500M,33.0,tomato,splice acceptor
-0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
-0.4833243191242218,Residual CNN 44M,8.0,tomato,splice acceptor
-0.7335307598114014,PlantCAD2 88M,46.0,tomato,splice acceptor
-0.6908777952194214,NTv3 8M (pre),1.0,tomato,splice acceptor
-0.7348777055740356,NTv3 100M (pre),1.0,tomato,splice acceptor
-0.7484620809555054,NTv3 650M (pre),3.0,tomato,splice acceptor
-0.7539154291152954,NTv3 650M (pos),4.0,tomato,splice acceptor
-0.1586925536394119,NTv2 500M,33.0,tomato,start codon
-0.0,BPNet arch. 6M,2.0,tomato,start codon
-0.1107296794652938,Residual CNN 44M,8.0,tomato,start codon
-0.3756755590438843,PlantCAD2 88M,48.0,tomato,start codon
-0.4113904237747192,NTv3 8M (pre),1.0,tomato,start codon
-0.4541433155536651,NTv3 100M (pre),1.0,tomato,start codon
-0.5002310872077942,NTv3 650M (pre),3.0,tomato,start codon
-0.5470007658004761,NTv3 650M (pos),4.0,tomato,start codon
-0.6712294220924377,NTv2 500M,33.0,tomato,intron
-0.3502058088779449,BPNet arch. 6M,2.0,tomato,intron
-0.5514466166496277,Residual CNN 44M,8.0,tomato,intron
-0.722817599773407,PlantCAD2 88M,88.0,tomato,intron
-0.7013162970542908,NTv3 8M (pre),1.0,tomato,intron
-0.747364342212677,NTv3 100M (pre),1.0,tomato,intron
-0.752423107624054,NTv3 650M (pre),3.0,tomato,intron
-0.7750566005706787,NTv3 650M (pos),4.0,tomato,intron
-0.6022632718086243,NTv2 500M,33.0,tomato,exon
-0.3020758032798767,BPNet arch. 6M,2.0,tomato,exon
-0.4746756553649902,Residual CNN 44M,8.0,tomato,exon
-0.7354215979576111,PlantCAD2 88M,45.0,tomato,exon
-0.7157281041145325,NTv3 8M (pre),1.0,tomato,exon
-0.7326820492744446,NTv3 100M (pre),1.0,tomato,exon
-0.7308483123779297,NTv3 650M (pre),3.0,tomato,exon
-0.7417197823524475,NTv3 650M (pos),4.0,tomato,exon
-0.1558358669281005,NTv2 500M,33.0,tomato,splice acceptor
-0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
-0.3391502797603607,Residual CNN 44M,8.0,tomato,splice acceptor
-0.7305923700332642,PlantCAD2 88M,85.0,tomato,splice acceptor
-0.6977006196975708,NTv3 8M (pre),1.0,tomato,splice acceptor
-0.6770275831222534,NTv3 100M (pre),1.0,tomato,splice acceptor
-0.6770390272140503,NTv3 650M (pre),3.0,tomato,splice acceptor
-0.7287323474884033,NTv3 650M (pos),4.0,tomato,splice acceptor
-0.1887903958559036,NTv2 500M,33.0,tomato,start codon
-0.0639578104019165,BPNet arch. 6M,2.0,tomato,start codon
-0.0914037525653839,Residual CNN 44M,8.0,tomato,start codon
-0.4881043434143066,PlantCAD2 88M,88.0,tomato,start codon
-0.4309621453285217,NTv3 8M (pre),1.0,tomato,start codon
-0.4028272926807403,NTv3 100M (pre),1.0,tomato,start codon
-0.4060510396957397,NTv3 650M (pre),3.0,tomato,start codon
-0.472331553697586,NTv3 650M (pos),4.0,tomato,start codon
-0.1995969861745834,NTv2 500M,72.0,human,intron
-0.0296161584556102,BPNet arch. 6M,3.0,human,intron
-0.2347834408283233,Residual CNN 44M,15.0,human,intron
-0.33451908826828,HyenaDNA 7M,17.0,human,intron
-0.4144788980484009,Caduceus 7M,27.0,human,intron
-0.0,Evo2 1B,34.0,human,intron
-0.4695742726325989,NTv3 8M (pre),1.0,human,intron
-0.475054919719696,NTv3 100M (pre),2.0,human,intron
-0.5504136681556702,NTv3 650M (pre),5.0,human,intron
-0.5643875002861023,NTv3 650M (pos),6.0,human,intron
-0.0546500161290168,NTv2 500M,72.0,human,exon
-0.2706590592861175,BPNet arch. 6M,3.0,human,exon
-0.2678671479225158,Residual CNN 44M,15.0,human,exon
-0.179698497056961,HyenaDNA 7M,19.0,human,exon
-0.5098947286605835,Caduceus 7M,26.0,human,exon
-0.4510694444179535,Evo2 1B,34.0,human,exon
-0.6089931726455688,NTv3 8M (pre),1.0,human,exon
-0.6492856740951538,NTv3 100M (pre),2.0,human,exon
-0.6975767016410828,NTv3 650M (pre),5.0,human,exon
-0.6822624206542969,NTv3 650M (pos),8.0,human,exon
-0.1493269056081771,NTv2 500M,73.0,human,splice acceptor
-0.3807527124881744,BPNet arch. 6M,3.0,human,splice acceptor
-0.6632664203643799,Residual CNN 44M,15.0,human,splice acceptor
-0.1002769619226455,HyenaDNA 7M,17.0,human,splice acceptor
-0.7357247471809387,Caduceus 7M,24.0,human,splice acceptor
-0.1821079105138778,Evo2 1B,34.0,human,splice acceptor
-0.7726271748542786,NTv3 8M (pre),1.0,human,splice acceptor
-0.77947598695755,NTv3 100M (pre),2.0,human,splice acceptor
-0.8028115034103394,NTv3 650M (pre),5.0,human,splice acceptor
-0.7979229092597961,NTv3 650M (pos),7.0,human,splice acceptor
-0.139576569199562,NTv2 500M,73.0,human,start codon
-0.1334401220083236,BPNet arch. 6M,3.0,human,start codon
-0.3876807987689972,Residual CNN 44M,15.0,human,start codon
-0.1003016158938407,HyenaDNA 7M,18.0,human,start codon
-0.3958532512187958,Caduceus 7M,24.0,human,start codon
-0.1399599611759185,Evo2 1B,34.0,human,start codon
-0.540923535823822,NTv3 8M (pre),1.0,human,start codon
-0.5464004278182983,NTv3 100M (pre),2.0,human,start codon
-0.6803378462791443,NTv3 650M (pre),5.0,human,start codon
-0.7310947179794312,NTv3 650M (pos),7.0,human,start codon
-0.0814515128731727,NTv2 500M,72.0,human,intron
-0.0172978900372982,BPNet arch. 6M,5.0,human,intron
-0.2740728259086609,Residual CNN 44M,19.0,human,intron
-0.3312098085880279,HyenaDNA 7M,21.0,human,intron
-0.5108950138092041,Caduceus 7M,33.0,human,intron
-0.5034915208816528,NTv3 8M (pre),1.0,human,intron
-0.5154411792755127,NTv3 100M (pre),2.0,human,intron
-0.5814740061759949,NTv3 650M (pre),6.0,human,intron
-0.5920455455780029,NTv3 650M (pos),8.0,human,intron
-0.3505669236183166,NTv2 500M,72.0,human,exon
-0.2252149283885955,BPNet arch. 6M,5.0,human,exon
-0.4010578095912933,Residual CNN 44M,18.0,human,exon
-0.1851459741592407,HyenaDNA 7M,18.0,human,exon
-0.4599409103393554,Caduceus 7M,33.0,human,exon
-0.5931490063667297,NTv3 8M (pre),1.0,human,exon
-0.6058318018913269,NTv3 100M (pre),2.0,human,exon
-0.6738048791885376,NTv3 650M (pre),6.0,human,exon
-0.6936564445495605,NTv3 650M (pos),8.0,human,exon
-0.1533636748790741,NTv2 500M,72.0,human,splice acceptor
-0.3751010596752167,BPNet arch. 6M,5.0,human,splice acceptor
-0.681228756904602,Residual CNN 44M,19.0,human,splice acceptor
-0.0252278540283441,HyenaDNA 7M,22.0,human,splice acceptor
-0.7485092878341675,Caduceus 7M,35.0,human,splice acceptor
-0.7772909998893738,NTv3 8M (pre),1.0,human,splice acceptor
-0.794090747833252,NTv3 100M (pre),2.0,human,splice acceptor
-0.8239933252334595,NTv3 650M (pre),6.0,human,splice acceptor
-0.804115891456604,NTv3 650M (pos),8.0,human,splice acceptor
-0.0851806029677391,NTv2 500M,72.0,human,start codon
-0.0,BPNet arch. 6M,5.0,human,start codon
-0.3292546272277832,Residual CNN 44M,19.0,human,start codon
-0.0647941380739212,HyenaDNA 7M,20.0,human,start codon
-0.4505241215229034,Caduceus 7M,33.0,human,start codon
-0.60422682762146,NTv3 8M (pre),1.0,human,start codon
-0.6015576124191284,NTv3 100M (pre),2.0,human,start codon
-0.6452956795692444,NTv3 650M (pre),6.0,human,start codon
-0.6761345267295837,NTv3 650M (pos),8.0,human,start codon
-0.0558800511062145,NTv2 500M,70.0,human,intron
-0.0185965970158576,BPNet arch. 6M,5.0,human,intron
-0.2623045742511749,Residual CNN 44M,18.0,human,intron
-0.3633092641830444,HyenaDNA 7M,68.0,human,intron
-0.4261827170848846,Caduceus 7M,29.0,human,intron
-0.4804849028587341,NTv3 8M (pre),1.0,human,intron
-0.482195496559143,NTv3 100M (pre),2.0,human,intron
-0.5425574779510498,NTv3 650M (pre),6.0,human,intron
-0.5443048477172852,NTv3 650M (pos),8.0,human,intron
-0.3958893716335296,NTv2 500M,71.0,human,exon
-0.2360571771860122,BPNet arch. 6M,5.0,human,exon
-0.3744256496429443,Residual CNN 44M,18.0,human,exon
-0.1936572045087814,HyenaDNA 7M,68.0,human,exon
-0.5046994090080261,Caduceus 7M,29.0,human,exon
-0.6339762210845947,NTv3 8M (pre),1.0,human,exon
-0.6433913111686707,NTv3 100M (pre),2.0,human,exon
-0.6518793702125549,NTv3 650M (pre),6.0,human,exon
-0.6812491416931152,NTv3 650M (pos),8.0,human,exon
-0.1248077526688575,NTv2 500M,70.0,human,splice acceptor
-0.3842235207557678,BPNet arch. 6M,5.0,human,splice acceptor
-0.6810190081596375,Residual CNN 44M,18.0,human,splice acceptor
-0.0527583621442317,HyenaDNA 7M,17.0,human,splice acceptor
-0.7072214484214783,Caduceus 7M,29.0,human,splice acceptor
-0.7796080708503723,NTv3 8M (pre),1.0,human,splice acceptor
-0.7596970200538635,NTv3 100M (pre),2.0,human,splice acceptor
-0.7915040850639343,NTv3 650M (pre),6.0,human,splice acceptor
-0.7957100868225098,NTv3 650M (pos),8.0,human,splice acceptor
-0.1267423331737518,NTv2 500M,70.0,human,start codon
-0.1114460304379463,BPNet arch. 6M,5.0,human,start codon
-0.3342535495758056,Residual CNN 44M,18.0,human,start codon
-0.1215013489127159,HyenaDNA 7M,18.0,human,start codon
-0.4082835018634796,Caduceus 7M,29.0,human,start codon
-0.5167152881622314,NTv3 8M (pre),1.0,human,start codon
-0.5340564250946045,NTv3 100M (pre),2.0,human,start codon
-0.6148532032966614,NTv3 650M (pre),6.0,human,start codon
-0.6582212448120117,NTv3 650M (pos),8.0,human,start codon
-0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
-0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
-0.6582212448120117,Caduceus 7M,8.0,human,start codon
-0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
-0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
-0.6582212448120117,Caduceus 7M,8.0,human,start codon
-0.6582212448120117,NTv3 650M (pre),8.0,human,start codon
-0.6582212448120117,BPNet arch. 6M,8.0,human,start codon
-0.6582212448120117,Caduceus 7M,8.0,human,start codon

data/bigwig_dataset.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

data/ntv3_benchmark_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

src/streamlit_app.py CHANGED Viewed

@@ -82,6 +82,7 @@ ASSAY_COLORS = {
     "splice acceptor": '#ff9900',
     "start codon": '#9933cc',
 }
 MODEL_COLORS = {
     "NTv3 650M (pos)": COLORS['blue_0'],
@@ -93,7 +94,8 @@ MODEL_COLORS = {
     "BPNet arch. 6M": COLORS['cyan_1'],
     "Residual CNN 44M":  COLORS['magenta_1'],
     "PlantCAD2 88M": COLORS["purple_1"],
-    "Caduceus 7M": COLORS["purple_2"]
 }
 MODEL_TRAINING_STATUS = {
@@ -107,6 +109,7 @@ MODEL_TRAINING_STATUS = {
     "NTv2 500M": "PRE",
     "BPNet arch. 6M": "SCRATCH",
     "PlantCAD2 88M": "PRE",
 }
 MODEL_GPU_MULTIPLIER = {
@@ -152,8 +155,7 @@ HERE = os.path.dirname(os.path.abspath(__file__))  # /app/src
 PROJECT_ROOT = os.path.dirname(HERE)               # /app
 DATA_DIR = os.path.join(PROJECT_ROOT, "data")
-PEARSON_PATH = os.path.join(DATA_DIR, "bigwig_dataset.csv")
-MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
 # ---------------------------------------------------------------------
 # Data loading & preprocessing
@@ -162,122 +164,108 @@ MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
 @st.cache_data
 def load_raw_data():
-    pearson_df = pd.read_csv(PEARSON_PATH)
-    mcc_df = pd.read_csv(MCC_PATH)
-    pearson_df.columns = [c.strip() for c in pearson_df.columns]
-    mcc_df.columns = [c.strip() for c in mcc_df.columns]
-    return pearson_df, mcc_df
-def _normalize_training_hours(df: pd.DataFrame) -> pd.DataFrame:
-    return df.rename(columns={"running_time_hours": "GPU hours"})
-@st.cache_data
-def load_expanded_data():
     """
-    Load data in the new format where each row is already:
-      (species, [assay_type], datasets, model_name, metric)
-    and convert into a unified schema:
-      species, assay_type?, datasets, Model, Score
-    For Pearson:
-      If multiple rows share (species, assay_type, datasets, Model),
-      we average their Score.
-    For MCC:
-      If multiple rows share (species, datasets, Model),
-      we average their Score.
     """
-    pearson_df, mcc_df = load_raw_data()
-    pearson_df = _normalize_training_hours(pearson_df)
-    mcc_df = _normalize_training_hours(mcc_df)
-    #if "track_name_clean" in pearson_df.columns:
-    #    pearson_df = pearson_df.drop(columns=["datasets"], errors="ignore")
-    #    pearson_df = pearson_df.rename(columns={"track_name_clean": "datasets"})
-    # --- Pearson correlations ---
-    # Expect columns: species, assay_type, datasets, model_name, pearson correlation
-    pearson_df = pearson_df.rename(
-        columns={
-            "model_name": "Model",
-            "pearson correlation": "Score",
-        }
-    )
-    # --- Keep track_name_clean available (for head-to-head only later) ---
-    pearson_track_map = None
-    if "track_name_clean" in pearson_df.columns:
-        map_keys = ["species", "datasets"]
-        if "assay_type" in pearson_df.columns:
-            map_keys.append("assay_type")
-        pearson_track_map = (
-            pearson_df[map_keys + ["track_name_clean"]]
-            .dropna(subset=["track_name_clean"])
-            .drop_duplicates()
-    )
     pearson_group_cols = ["species", "datasets", "Model"]
-    if "assay_type" in pearson_df.columns:
         pearson_group_cols.append("assay_type")
-    agg_cols = {"Score": "mean"}
-    if "GPU hours" in pearson_df.columns:
-        agg_cols["GPU hours"] = "mean"
-    # --- after aggregation ---
     pearson_df = (
-        pearson_df
         .groupby(pearson_group_cols, as_index=False, dropna=False)
-        .agg(agg_cols)
     )
-    # ✅ Merge track_name_clean back FIRST (assay_type still raw here)
-    if pearson_track_map is not None:
-        pearson_df = pearson_df.merge(pearson_track_map, on=map_keys, how="left")
-    # ✅ THEN map assay_type to your categories
     if "assay_type" in pearson_df.columns:
         pearson_df["assay_type"] = (
-            pearson_df["assay_type"]
-            .map(ASSAY_TYPE_MAPPING)
-            .fillna("Other")
         )
-    # --- MCC (bed tracks) ---
-    # Expect columns: species, datasets, model_name, MCC
-    mcc_df = mcc_df.rename(
-        columns={
-            "model_name": "Model",
-            "MCC": "Score",
-        }
-    )
-    # Collapse duplicates with same (species, datasets, Model)
-    mcc_group_cols = ["species", "datasets", "Model"]
-    agg_cols = {"Score": "mean"}
-    if "GPU hours" in mcc_df.columns:
-        agg_cols["GPU hours"] = "mean"
     mcc_df = (
-        mcc_df
-        .groupby(mcc_group_cols, as_index=False, dropna=False)
-        .agg(agg_cols)
     )
-    # Optional sanity checks
-    for df_name, df in [("pearson", pearson_df), ("mcc", mcc_df)]:
-        required = {"species", "datasets", "Model", "Score"}
-        missing = required - set(df.columns)
-        if missing:
-            st.error(f"{df_name} dataframe missing columns: {missing}")
     return pearson_df, mcc_df
@@ -510,45 +498,6 @@ def plot_breakdown_facets_sorted_models(
     return fig
-def build_radar_df(
-    benchmark_name: str,
-    selected_species: List[str],
-    selected_assays: List[str],
-    selected_models: List[str],
-    selected_datasets: List[str],
-) -> pd.DataFrame:
-    cfg = _BENCHMARKS[benchmark_name]
-    df = filter_base_df(
-        benchmark_name,
-        selected_species,
-        selected_assays,
-        selected_models,
-        selected_datasets,
-    )
-    if df.empty:
-        return pd.DataFrame()
-    # Choose axis column
-    if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
-        axis_col = "assay_type"
-        axis_label = "Assay type"
-    else:
-        axis_col = "datasets"
-        axis_label = "Dataset"
-    radar_df = (
-        df.groupby([axis_col, "Model"], as_index=False)["Score"]
-        .mean()
-        .rename(columns={axis_col: "Axis", "Score": "Value"})
-    )
-    radar_df.attrs["axis_label"] = axis_label
-    return radar_df
 def build_pairwise_scatter_df(
     benchmark_name: str,
     selected_species: List[str],
@@ -558,19 +507,8 @@ def build_pairwise_scatter_df(
     model_a: str,
     model_b: str,
 ) -> pd.DataFrame:
-    """
-    Returns a per-track dataframe with columns:
-      Track, Model A, Model B, (optional) species, (optional) assay_type, datasets
-    Where each row corresponds to a specific track (datasets [+ assay_type]).
-    Special case:
-      If `track_name_clean` exists (typically for bigwig Functional Tracks),
-      we use it ONLY for the head-to-head "Track" label (and track identity),
-      while keeping the rest of the app using `datasets`.
-    """
     cfg = _BENCHMARKS[benchmark_name]
-    # Ensure chosen models are included even if toggles exclude them
     models_for_filter = (
         list(set(selected_models + [model_a, model_b]))
         if selected_models else [model_a, model_b]
@@ -583,24 +521,18 @@ def build_pairwise_scatter_df(
         models_for_filter,
         selected_datasets,
     )
     if df.empty:
         return pd.DataFrame()
-    # Prefer track_name_clean for BigWig head-to-head labeling ONLY
-    # (fallback to datasets if missing)
-    track_id_col = "track_name_clean" if "track_name_clean" in df.columns else "datasets"
-    # Define what “a specific track” means
-    track_cols = [track_id_col]
     if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
-        track_cols = ["assay_type", track_id_col]
-    # Keep species in hover if multiple are selected
     keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
     id_cols = (["species"] if keep_species else []) + track_cols
-    # Pivot into two model columns
     wide = (
         df[df["Model"].isin([model_a, model_b])]
         .pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
@@ -612,26 +544,28 @@ def build_pairwise_scatter_df(
     wide = wide.dropna(subset=[model_a, model_b])
-    # Create a nice "Track" label for display (uses track_name_clean if available)
     if "assay_type" in wide.columns:
-        wide["Track"] = wide["assay_type"].astype(str) + " / " + wide[track_id_col].astype(str)
     else:
-        wide["Track"] = wide[track_id_col].astype(str)
-    # Rename for plotting
     wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
-    # If we used track_name_clean, keep datasets around too (if present) for hover/debug
-    # (nothing breaks if it's absent)
-    if track_id_col == "track_name_clean" and "datasets" in df.columns and "datasets" not in wide.columns:
-        # merge back datasets for hover only
-        merge_keys = id_cols.copy()
-        extra = df[merge_keys + ["datasets"]].drop_duplicates()
-        wide = wide.merge(extra, on=merge_keys, how="left")
     return wide
 def build_violin_df(
     benchmark_name: str,
     selected_species: List[str],
@@ -655,75 +589,15 @@ def build_violin_df(
     return df[keep].copy()
-def plot_radar(
-    radar_df: pd.DataFrame,
-    metric_label: str,
-    height: int = 600,
-):
-    if radar_df.empty:
-        return None
-    axes = radar_df["Axis"].unique().tolist()
-    # Global radial range
-    r_min = radar_df["Value"].min()
-    r_max = radar_df["Value"].max()
-    pad = 0.05 * (r_max - r_min if r_max > r_min else 1.0)
-    r_range = [r_min - pad, r_max + pad]
-    fig = go.Figure()
-    for model in radar_df["Model"].unique():
-        sub = radar_df[radar_df["Model"] == model]
-        # Ensure consistent axis ordering
-        sub = sub.set_index("Axis").reindex(axes)
-        fig.add_trace(
-            go.Scatterpolar(
-                r=sub["Value"],
-                theta=axes,
-                fill="toself",
-                name=model,
-                line_color=MODEL_COLORS.get(model),
-                opacity=0.75,
-            )
-        )
-    fig.update_layout(
-        height=height,
-        polar=dict(
-            bgcolor="rgba(0,0,0,0)",          # 👈 polar background
-            radialaxis=dict(
-                title=metric_label,
-                range=r_range,
-                tickformat=".2f",
-                showgrid=True,
-                gridcolor="rgba(0,0,0,0.15)", # subtle grid
-            ),
-            angularaxis=dict(
-                showgrid=True,
-                gridcolor="rgba(0,0,0,0.15)",
-            ),
-        ),
-        paper_bgcolor="rgba(0,0,0,0)",        # 👈 entire figure background
-        plot_bgcolor="rgba(0,0,0,0)",         # 👈 plot area
-        showlegend=True,
-        legend_title_text="Model",
-        margin=dict(t=40, b=40, l=40, r=40),
-    )
-    return fig
 def build_convergence_df(
     benchmark_name: str,
     selected_species: List[str],
     selected_assays: List[str],
     selected_models: List[str],
     selected_datasets: List[str],
 ) -> pd.DataFrame:
     df = filter_base_df(
         benchmark_name,
         selected_species,
@@ -732,25 +606,58 @@ def build_convergence_df(
         selected_datasets,
     )
-    if df.empty or "GPU hours" not in df.columns:
-        return pd.DataFrame(columns=["Model", "GPU hours", "Performance"])
     out = (
         df.groupby("Model", as_index=False)
-        .agg({"Score": "mean", "GPU hours": "mean"})
         .rename(columns={"Score": "Performance"})
     )
-    # Apply per-model multiplier (default 1)
-    out["GPU multiplier"] = out["Model"].map(MODEL_GPU_MULTIPLIER).fillna(1).astype(float)
-    out["GPU hours"] = out["GPU hours"] * out["GPU multiplier"]
-    out = out.dropna(subset=["GPU hours", "Performance"])
     out["Performance"] = out["Performance"].round(3)
-    out["GPU hours"] = out["GPU hours"].round(1)
     return out
 # ---------------------------------------------------------------------
 # UI helpers
 # ---------------------------------------------------------------------
@@ -893,7 +800,7 @@ def main():
             )
             fig.update_layout(
                 barmode="group",
-                height=480,
                 xaxis_title="",
                 yaxis_title=cfg["metric_label"],
                 plot_bgcolor="rgba(0,0,0,0)",
@@ -970,11 +877,9 @@ def main():
             pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
             axis_range = [min_v - pad, max_v + pad]
             tick_step = (axis_range[1] - axis_range[0]) / 5
-            hover_cols = []
-            # Prefer track_name_clean; fall back to Track if not present
-            if "track_name_clean" in scatter_df.columns:
                 hover_cols.append("track_name_clean")
             else:
                 hover_cols.append("datasets")
@@ -1038,49 +943,57 @@ def main():
     with right:
         st.markdown("#### ⏱️ Time to convergence")
         conv_df = build_convergence_df(
             benchmark_name,
             selected_species,
             selected_assays,
             selected_models,
             selected_datasets,
         )
         if conv_df.empty:
-            st.info("No training-time data found for the selected filters (missing 'GPU hours').")
         else:
             fig_conv = px.scatter(
                 conv_df,
-                x="GPU hours",
                 y="Performance",
                 text="Model",
-                color="Model",                       # 👈 color by model
-                color_discrete_map=MODEL_COLORS,     # 👈 your palette
-                hover_data=["Model", "GPU hours", "Performance"],
             )
-            fig_conv.update_traces(textposition="top center")
             fig_conv.update_layout(
-                height=630,
-                xaxis=dict(title="GPU hours", type="log"),
-                yaxis=dict(title=cfg["metric_label"]),
                 plot_bgcolor="rgba(0,0,0,0)",
                 paper_bgcolor="rgba(0,0,0,0)",
-                showlegend=False,
             )
-            fig_conv.update_xaxes(
                 type="log",
-                range=[0, np.log10(conv_df["GPU hours"].max())],  # log10(1) = 0
-                title="GPU hours (log scale)",
             )
-            # optional: hide legend if labels already on points
-            # fig_conv.update_layout(showlegend=False)
             st.plotly_chart(fig_conv, use_container_width=True)
     # ------------------------------------------------------------------
     # Violin (full width, below)
     # ------------------------------------------------------------------

     "splice acceptor": '#ff9900',
     "start codon": '#9933cc',
 }
+ASSAY_COLORS["Other"] = "#808080"
 MODEL_COLORS = {
     "NTv3 650M (pos)": COLORS['blue_0'],
     "BPNet arch. 6M": COLORS['cyan_1'],
     "Residual CNN 44M":  COLORS['magenta_1'],
     "PlantCAD2 88M": COLORS["purple_1"],
+    "Caduceus 7M": COLORS["purple_2"],
+    "HyenaDNA 7M": COLORS["yellow_2"]
 }
 MODEL_TRAINING_STATUS = {
     "NTv2 500M": "PRE",
     "BPNet arch. 6M": "SCRATCH",
     "PlantCAD2 88M": "PRE",
+    "HyenaDNA 7M": "PRE"
 }
 MODEL_GPU_MULTIPLIER = {
 PROJECT_ROOT = os.path.dirname(HERE)               # /app
 DATA_DIR = os.path.join(PROJECT_ROOT, "data")
+SINGLE_TABLE_PATH = os.path.join(DATA_DIR, "ntv3_benchmark_results.csv")
 # ---------------------------------------------------------------------
 # Data loading & preprocessing
 @st.cache_data
 def load_raw_data():
+    df = pd.read_csv(SINGLE_TABLE_PATH)
+    df.columns = [c.strip() for c in df.columns]
+    return df
+def _normalize_training_time_to_gpu_hours(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Your new column is `running_time`. In your sample it looks like seconds
+    (e.g. 317034 ~= 88 hours). We'll convert to hours if values look like seconds.
+    """
+    if "running_time" not in df.columns:
+        return df
+    rt = pd.to_numeric(df["running_time"], errors="coerce")
+    # Heuristic: if median is huge, it's probably seconds -> convert to hours
+    # (88 hours = 316800 seconds is a typical-looking value in your sample)
+    if rt.dropna().median() > 10_000:
+        df["GPU hours"] = rt / 3600.0
+    else:
+        df["GPU hours"] = rt.astype(float)
+    return df
+def _best_step_time_to_hours(s: pd.Series) -> pd.Series:
     """
+    Converts strings like '3 days 04:26:26.467000' to hours (float).
+    Works with pandas Timedelta parsing.
     """
+    td = pd.to_timedelta(s, errors="coerce")
+    return td.dt.total_seconds() / 3600.0
+@st.cache_data
+def load_expanded_data():
+    df = load_raw_data().copy()
+    df = df.rename(columns={"Metric": "Score", "model_name": "Model"})
+    df["Score"] = pd.to_numeric(df["Score"], errors="coerce")
+    if "best_step" in df.columns:
+        df["best_step"] = pd.to_numeric(df["best_step"], errors="coerce")
+    if "best_step_time" in df.columns:
+        df["best_step_time_hours"] = _best_step_time_to_hours(df["best_step_time"])
+    else:
+        df["best_step_time_hours"] = np.nan
+    is_annot = df.get("assay_type", "").astype(str).eq("Annotation")
+    pearson_raw = df[~is_annot].copy()
+    mcc_raw = df[is_annot].copy()
+    # -------------------------
+    # Functional Tracks (Pearson)
+    # -------------------------
     pearson_group_cols = ["species", "datasets", "Model"]
+    if "assay_type" in pearson_raw.columns:
         pearson_group_cols.append("assay_type")
     pearson_df = (
+        pearson_raw
         .groupby(pearson_group_cols, as_index=False, dropna=False)
+        .agg({
+            "Score": "mean",
+            "best_step": "mean",
+            "best_step_time_hours": "mean",
+        })
     )
+    # ✅ merge track_name_clean WHILE assay_type is still raw
+    if "track_name_clean" in pearson_raw.columns:
+        map_keys = ["species", "datasets"]
+        if "assay_type" in pearson_raw.columns:
+            map_keys.append("assay_type")
+        track_map = (
+            pearson_raw[map_keys + ["track_name_clean"]]
+            .dropna(subset=["track_name_clean"])
+            .drop_duplicates()
+        )
+        pearson_df = pearson_df.merge(track_map, on=map_keys, how="left")
+    # ✅ now it’s safe to map assay_type to categories
     if "assay_type" in pearson_df.columns:
         pearson_df["assay_type"] = (
+            pearson_df["assay_type"].astype(str).map(ASSAY_TYPE_MAPPING).fillna("Other")
         )
+    # -------------------------
+    # Genome Annotation (MCC)
+    # -------------------------
     mcc_df = (
+        mcc_raw
+        .groupby(["species", "datasets", "Model"], as_index=False, dropna=False)
+        .agg({
+            "Score": "mean",
+            "best_step": "mean",
+            "best_step_time_hours": "mean",
+        })
     )
     return pearson_df, mcc_df
     return fig
 def build_pairwise_scatter_df(
     benchmark_name: str,
     selected_species: List[str],
     model_a: str,
     model_b: str,
 ) -> pd.DataFrame:
     cfg = _BENCHMARKS[benchmark_name]
     models_for_filter = (
         list(set(selected_models + [model_a, model_b]))
         if selected_models else [model_a, model_b]
         models_for_filter,
         selected_datasets,
     )
     if df.empty:
         return pd.DataFrame()
+    # ---- define "track identity" for head-to-head ----
+    # Always use datasets for the identity (x/y points)
+    track_cols = ["datasets"]
     if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
+        track_cols = ["assay_type", "datasets"]
     keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
     id_cols = (["species"] if keep_species else []) + track_cols
     wide = (
         df[df["Model"].isin([model_a, model_b])]
         .pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
     wide = wide.dropna(subset=[model_a, model_b])
+    # Nice display label: use datasets (not track_name_clean)
     if "assay_type" in wide.columns:
+        wide["Track"] = wide["assay_type"].astype(str) + " / " + wide["datasets"].astype(str)
     else:
+        wide["Track"] = wide["datasets"].astype(str)
     wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
+    # ---- Pearson-only: merge track_name_clean for hover ----
+    if benchmark_name == "Functional Tracks" and "track_name_clean" in df.columns:
+        merge_keys = id_cols.copy()  # species? + assay_type? + datasets
+        track_map = (
+            df[merge_keys + ["track_name_clean"]]
+            .dropna(subset=["track_name_clean"])
+            .drop_duplicates()
+        )
+        wide = wide.merge(track_map, on=merge_keys, how="left")
     return wide
 def build_violin_df(
     benchmark_name: str,
     selected_species: List[str],
     return df[keep].copy()
 def build_convergence_df(
     benchmark_name: str,
     selected_species: List[str],
     selected_assays: List[str],
     selected_models: List[str],
     selected_datasets: List[str],
+    x_mode: str = "best_step",  # "best_step" | "best_step_time"
 ) -> pd.DataFrame:
     df = filter_base_df(
         benchmark_name,
         selected_species,
         selected_datasets,
     )
+    if df.empty:
+        return pd.DataFrame(columns=["Model", "X", "Performance"])
+    # Mean performance per model
     out = (
         df.groupby("Model", as_index=False)
+        .agg({"Score": "mean"})
         .rename(columns={"Score": "Performance"})
     )
+    # -------------------------
+    # X axis selection
+    # -------------------------
+    if x_mode == "Steps":
+        if "best_step" not in df.columns:
+            return pd.DataFrame(columns=["Model", "X", "Performance"])
+        x = (
+            df.groupby("Model", as_index=False)["best_step"]
+            .mean()
+            .rename(columns={"best_step": "X"})
+        )
+    else:  # best_step_time (GPU hours)
+        if "best_step_time_hours" not in df.columns:
+            return pd.DataFrame(columns=["Model", "X", "Performance"])
+        x = (
+            df.groupby("Model", as_index=False)["best_step_time_hours"]
+            .mean()
+            .rename(columns={"best_step_time_hours": "X"})
+        )
+        # 👇 Apply GPU multiplier (Evo2 uses 8 GPUs)
+        gpu_multiplier = {
+            "Evo2 1B": 8,
+        }
+        x["X"] = x.apply(
+            lambda r: r["X"] * gpu_multiplier.get(r["Model"], 1),
+            axis=1,
+        )
+    # Merge + clean
+    out = out.merge(x, on="Model", how="left")
+    out = out.dropna(subset=["X", "Performance"])
     out["Performance"] = out["Performance"].round(3)
     return out
 # ---------------------------------------------------------------------
 # UI helpers
 # ---------------------------------------------------------------------
             )
             fig.update_layout(
                 barmode="group",
+                height=500,
                 xaxis_title="",
                 yaxis_title=cfg["metric_label"],
                 plot_bgcolor="rgba(0,0,0,0)",
             pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
             axis_range = [min_v - pad, max_v + pad]
             tick_step = (axis_range[1] - axis_range[0]) / 5
+            hover_cols = ["datasets"]
+            if benchmark_name == "Functional Tracks" and "track_name_clean" in scatter_df.columns:
                 hover_cols.append("track_name_clean")
             else:
                 hover_cols.append("datasets")
     with right:
         st.markdown("#### ⏱️ Time to convergence")
+        x_mode = st.selectbox(
+            "X-axis",
+            options=["GPU hours", "Steps"],
+            index=0,
+            key=f"conv_x_mode_{benchmark_name}",
+        )
         conv_df = build_convergence_df(
             benchmark_name,
             selected_species,
             selected_assays,
             selected_models,
             selected_datasets,
+            x_mode=x_mode,
         )
         if conv_df.empty:
+            st.info("No convergence data found for the selected filters / x-axis mode.")
         else:
             fig_conv = px.scatter(
                 conv_df,
+                x="X",
                 y="Performance",
                 text="Model",
+                color="Model",
+                color_discrete_map=MODEL_COLORS,
+                hover_data=["Model", "X", "Performance"],
             )
             fig_conv.update_layout(
+                height=550,
+                xaxis_title=("GPU hours" if x_mode == "GPU hours" else x_mode),
+                yaxis_title=cfg["metric_label"],
                 plot_bgcolor="rgba(0,0,0,0)",
                 paper_bgcolor="rgba(0,0,0,0)",
+                showlegend=False,  # ✅ no legend
+            )
+            fig_conv.update_traces(
+                marker=dict(size=14),          # 👈 bigger dots
+                textposition="top center",
             )
+            # Log scale only makes sense for hours (and sometimes best_step)
+            if x_mode in ["GPU hours"]:
+                fig_conv.update_xaxes(
                 type="log",
+                dtick=1,
+                minor=dict(ticks="", showgrid=False),
             )
             st.plotly_chart(fig_conv, use_container_width=True)
     # ------------------------------------------------------------------
     # Violin (full width, below)
     # ------------------------------------------------------------------