Spaces:

InstaDeepAI
/

ntv3_benchmark

Running

App Files Files Community

MidAtBest commited on Dec 16, 2025

Commit

48b1d06

1 Parent(s): e9035bc

feat: remove radar plot and improve layout

Browse files

Files changed (4) hide show

Dockerfile +1 -0
data/bed_dataset.csv +322 -247
data/bigwig_dataset.csv +0 -0
src/streamlit_app.py +188 -90

Dockerfile CHANGED Viewed

@@ -1,5 +1,6 @@
 FROM python:3.13.5-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \

 FROM python:3.13.5-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \

data/bed_dataset.csv CHANGED Viewed

@@ -1,247 +1,322 @@
-MCC,model_name,species,datasets
-0.334637850522995,NTv2 500M,cattle,intron
-0.1238768473267555,BPNet arch. 6M,cattle,intron
-0.383470207452774,Residual CNN 44M,cattle,intron
-0.3828243613243103,HyenaDNA 7M,cattle,intron
-0.4733810424804687,Caduceus 7M,cattle,intron
-0.4315277338027954,Evo2 1B,cattle,intron
-0.5455867648124695,NTv3 8M (pre),cattle,intron
-0.5453664064407349,NTv3 100M (pre),cattle,intron
-0.5628412365913391,NTv3 650M (pre),cattle,intron
-0.5682631134986877,NTv3 650M (post),cattle,intron
-0.3689357042312622,NTv2 500M,cattle,exon
-0.3250860869884491,BPNet arch. 6M,cattle,exon
-0.4674676060676574,Residual CNN 44M,cattle,exon
-0.2207767516374588,HyenaDNA 7M,cattle,exon
-0.4960922300815582,Caduceus 7M,cattle,exon
-0.4969632029533386,Evo2 1B,cattle,exon
-0.5432836413383484,NTv3 8M (pre),cattle,exon
-0.5531933307647705,NTv3 100M (pre),cattle,exon
-0.591151773929596,NTv3 650M (pre),cattle,exon
-0.6253225207328796,NTv3 650M (post),cattle,exon
-0.118808165192604,NTv2 500M,cattle,splice acceptor
-0.4715546369552612,BPNet arch. 6M,cattle,splice acceptor
-0.6620649099349976,Residual CNN 44M,cattle,splice acceptor
-0.104436807334423,HyenaDNA 7M,cattle,splice acceptor
-0.7064619660377502,Caduceus 7M,cattle,splice acceptor
-0.2085049450397491,Evo2 1B,cattle,splice acceptor
-0.7254849076271057,NTv3 8M (pre),cattle,splice acceptor
-0.7404072880744934,NTv3 100M (pre),cattle,splice acceptor
-0.7732946872711182,NTv3 650M (pre),cattle,splice acceptor
-0.7679624557495117,NTv3 650M (post),cattle,splice acceptor
-0.1412438601255417,NTv2 500M,cattle,start codon
-0.1490814685821533,BPNet arch. 6M,cattle,start codon
-0.3243320286273956,Residual CNN 44M,cattle,start codon
-0.056509330868721,HyenaDNA 7M,cattle,start codon
-0.3455557227134704,Caduceus 7M,cattle,start codon
-0.1030694246292114,Evo2 1B,cattle,start codon
-0.5275959968566895,NTv3 8M (pre),cattle,start codon
-0.4962065815925598,NTv3 100M (pre),cattle,start codon
-0.5591813921928406,NTv3 650M (pre),cattle,start codon
-0.5492052435874939,NTv3 650M (post),cattle,start codon
-0.5492052435874939,NTv2 500M,cattle,start codon
-0.1015273928642273,BPNet arch. 6M,cattle,intron
-0.3299930691719055,Residual CNN 44M,cattle,intron
-0.3826011121273041,HyenaDNA 7M,cattle,intron
-0.5564854741096497,Caduceus 7M,cattle,intron
-0.5564854741096497,NTv2 500M,cattle,intron
-0.323502242565155,BPNet arch. 6M,cattle,exon
-0.519285261631012,Residual CNN 44M,cattle,exon
-0.1038060635328292,HyenaDNA 7M,cattle,splice acceptor
-0.1038060635328292,Caduceus 7M,cattle,splice acceptor
-0.1038060635328292,NTv2 500M,cattle,splice acceptor
-0.4435675740242004,BPNet arch. 6M,cattle,splice acceptor
-0.6590774655342102,Residual CNN 44M,cattle,splice acceptor
-0.1038060635328292,HyenaDNA 7M,cattle,splice acceptor
-0.1038060635328292,Caduceus 7M,cattle,splice acceptor
-0.1038060635328292,NTv2 500M,cattle,splice acceptor
-0.0901669710874557,BPNet arch. 6M,cattle,start codon
-0.3548502624034881,Residual CNN 44M,cattle,start codon
-0.0545537285506725,HyenaDNA 7M,cattle,start codon
-0.0545537285506725,Caduceus 7M,cattle,start codon
-0.0639578104019165,BPNet arch. 6M,cattle,start codon
-0.3266464471817016,Residual CNN 44M,cattle,intron
-0.3266464471817016,HyenaDNA 7M,cattle,intron
-0.3266464471817016,Caduceus 7M,cattle,intron
-0.1383400112390518,BPNet arch. 6M,cattle,intron
-0.4856111407279968,Residual CNN 44M,cattle,exon
-0.4856111407279968,HyenaDNA 7M,cattle,exon
-0.4856111407279968,Caduceus 7M,cattle,exon
-0.4220209121704101,BPNet arch. 6M,cattle,splice acceptor
-0.689546525478363,Residual CNN 44M,cattle,splice acceptor
-0.689546525478363,HyenaDNA 7M,cattle,splice acceptor
-0.689546525478363,Caduceus 7M,cattle,splice acceptor
-0.0930091217160224,BPNet arch. 6M,cattle,start codon
-0.423166275024414,Residual CNN 44M,cattle,start codon
-0.423166275024414,HyenaDNA 7M,cattle,start codon
-0.423166275024414,Caduceus 7M,cattle,start codon
-0.4777896404266357,NTv2 500M,tomato,intron
-0.3216900527477264,BPNet arch. 6M,tomato,intron
-0.46840900182724,Residual CNN 44M,tomato,intron
-0.5251263380050659,PlantCAD2 88M,tomato,intron
-0.747674286365509,Evo2 1B,tomato,intron
-0.6858112812042236,NTv3 8M (pre),tomato,intron
-0.7038365006446838,NTv3 100M (pre),tomato,intron
-0.7481895685195923,NTv3 650M (pre),tomato,intron
-0.7458349466323853,NTv3 650M (post),tomato,intron
-0.6147475838661194,NTv2 500M,tomato,exon
-0.4551227986812591,BPNet arch. 6M,tomato,exon
-0.5068296194076538,Residual CNN 44M,tomato,exon
-0.7256030440330505,PlantCAD2 88M,tomato,exon
-0.7006198763847351,Evo2 1B,tomato,exon
-0.7537696361541748,NTv3 8M (pre),tomato,exon
-0.7484462857246399,NTv3 100M (pre),tomato,exon
-0.764011561870575,NTv3 650M (pre),tomato,exon
-0.7750575542449951,NTv3 650M (post),tomato,exon
-0.1691933125257492,NTv2 500M,tomato,splice acceptor
-0.125656172633171,BPNet arch. 6M,tomato,splice acceptor
-0.4359458982944488,Residual CNN 44M,tomato,splice acceptor
-0.744257926940918,PlantCAD2 88M,tomato,splice acceptor
-0.3791649639606476,Evo2 1B,tomato,splice acceptor
-0.6623862385749817,NTv3 8M (pre),tomato,splice acceptor
-0.6843105554580688,NTv3 100M (pre),tomato,splice acceptor
-0.7641868591308594,NTv3 650M (pre),tomato,splice acceptor
-0.7584431767463684,NTv3 650M (post),tomato,splice acceptor
-0.132934883236885,NTv2 500M,tomato,start codon
-0.0,BPNet arch. 6M,tomato,start codon
-0.088478960096836,Residual CNN 44M,tomato,start codon
-0.2019559442996978,PlantCAD2 88M,tomato,start codon
-0.1622217148542404,Evo2 1B,tomato,start codon
-0.2966536581516266,NTv3 8M (pre),tomato,start codon
-0.3968957066535949,NTv3 100M (pre),tomato,start codon
-0.4830105900764465,NTv3 650M (pre),tomato,start codon
-0.5007501244544983,NTv3 650M (post),tomato,start codon
-0.6770024299621582,NTv2 500M,tomato,intron
-0.2927957773208618,BPNet arch. 6M,tomato,intron
-0.1383400112390518,Residual CNN 44M,tomato,intron
-0.1383400112390518,PlantCAD2 88M,tomato,intron
-0.5751976370811462,NTv2 500M,tomato,exon
-0.3057552278041839,BPNet arch. 6M,tomato,exon
-0.168193981051445,NTv2 500M,tomato,splice acceptor
-0.0,BPNet arch. 6M,tomato,splice acceptor
-0.4833243191242218,Residual CNN 44M,tomato,splice acceptor
-0.4833243191242218,PlantCAD2 88M,tomato,splice acceptor
-0.1586925536394119,NTv2 500M,tomato,start codon
-0.0,BPNet arch. 6M,tomato,start codon
-0.1107296794652938,Residual CNN 44M,tomato,start codon
-0.1107296794652938,PlantCAD2 88M,tomato,start codon
-0.3502058088779449,BPNet arch. 6M,tomato,intron
-0.5514466166496277,Residual CNN 44M,tomato,intron
-0.5514466166496277,PlantCAD2 88M,tomato,intron
-0.3020758032798767,BPNet arch. 6M,tomato,exon
-0.4746756553649902,Residual CNN 44M,tomato,exon
-0.4746756553649902,PlantCAD2 88M,tomato,exon
-0.0,BPNet arch. 6M,tomato,splice acceptor
-0.3391502797603607,Residual CNN 44M,tomato,splice acceptor
-0.3391502797603607,PlantCAD2 88M,tomato,splice acceptor
-0.0639578104019165,BPNet arch. 6M,tomato,start codon
-0.0914037525653839,Residual CNN 44M,tomato,start codon
-0.0914037525653839,PlantCAD2 88M,tomato,start codon
-0.1995969861745834,NTv2 500M,human,intron
-0.0296161584556102,BPNet arch. 6M,human,intron
-0.2347834408283233,Residual CNN 44M,human,intron
-0.33451908826828,HyenaDNA 7M,human,intron
-0.4144788980484009,Caduceus 7M,human,intron
-0.0,Evo2 1B,human,intron
-0.4695742726325989,NTv3 8M (pre),human,intron
-0.475054919719696,NTv3 100M (pre),human,intron
-0.5504136681556702,NTv3 650M (pre),human,intron
-0.5643875002861023,NTv3 650M (post),human,intron
-0.1995969861745834,NTv2 500M,human,intron
-0.2706590592861175,BPNet arch. 6M,human,exon
-0.2678671479225158,Residual CNN 44M,human,exon
-0.179698497056961,HyenaDNA 7M,human,exon
-0.5098947286605835,Caduceus 7M,human,exon
-0.4510694444179535,Evo2 1B,human,exon
-0.6089931726455688,NTv3 8M (pre),human,exon
-0.6492856740951538,NTv3 100M (pre),human,exon
-0.6975767016410828,NTv3 650M (pre),human,exon
-0.6822624206542969,NTv3 650M (post),human,exon
-0.1493269056081771,NTv2 500M,human,splice acceptor
-0.3807527124881744,BPNet arch. 6M,human,splice acceptor
-0.6632664203643799,Residual CNN 44M,human,splice acceptor
-0.1002769619226455,HyenaDNA 7M,human,splice acceptor
-0.7357247471809387,Caduceus 7M,human,splice acceptor
-0.1821079105138778,Evo2 1B,human,splice acceptor
-0.7726271748542786,NTv3 8M (pre),human,splice acceptor
-0.77947598695755,NTv3 100M (pre),human,splice acceptor
-0.8028115034103394,NTv3 650M (pre),human,splice acceptor
-0.7979229092597961,NTv3 650M (post),human,splice acceptor
-0.139576569199562,NTv2 500M,human,start codon
-0.1334401220083236,BPNet arch. 6M,human,start codon
-0.3876807987689972,Residual CNN 44M,human,start codon
-0.1003016158938407,HyenaDNA 7M,human,start codon
-0.3958532512187958,Caduceus 7M,human,start codon
-0.1399599611759185,Evo2 1B,human,start codon
-0.540923535823822,NTv3 8M (pre),human,start codon
-0.5464004278182983,NTv3 100M (pre),human,start codon
-0.6803378462791443,NTv3 650M (pre),human,start codon
-0.7310947179794312,NTv3 650M (post),human,start codon
-0.7310947179794312,NTv2 500M,human,start codon
-0.0172978900372982,BPNet arch. 6M,human,intron
-0.2740728259086609,Residual CNN 44M,human,intron
-0.3312098085880279,HyenaDNA 7M,human,intron
-0.5108950138092041,Caduceus 7M,human,intron
-0.5034915208816528,NTv3 8M (pre),human,intron
-0.5154411792755127,NTv3 100M (pre),human,intron
-0.5814740061759949,NTv3 650M (pre),human,intron
-0.5920455455780029,NTv3 650M (post),human,intron
-0.5920455455780029,NTv2 500M,human,intron
-0.2252149283885955,BPNet arch. 6M,human,exon
-0.4010578095912933,Residual CNN 44M,human,exon
-0.1851459741592407,HyenaDNA 7M,human,exon
-0.4599409103393554,Caduceus 7M,human,exon
-0.5931490063667297,NTv3 8M (pre),human,exon
-0.6058318018913269,NTv3 100M (pre),human,exon
-0.6738048791885376,NTv3 650M (pre),human,exon
-0.6738048791885376,NTv3 650M (post),human,exon
-0.6738048791885376,NTv2 500M,human,exon
-0.3751010596752167,BPNet arch. 6M,human,splice acceptor
-0.681228756904602,Residual CNN 44M,human,splice acceptor
-0.0252278540283441,HyenaDNA 7M,human,splice acceptor
-0.7485092878341675,Caduceus 7M,human,splice acceptor
-0.7772909998893738,NTv3 8M (pre),human,splice acceptor
-0.794090747833252,NTv3 100M (pre),human,splice acceptor
-0.8239933252334595,NTv3 650M (pre),human,splice acceptor
-0.804115891456604,NTv3 650M (post),human,splice acceptor
-0.804115891456604,NTv2 500M,human,splice acceptor
-0.0,BPNet arch. 6M,human,start codon
-0.3292546272277832,Residual CNN 44M,human,start codon
-0.0647941380739212,HyenaDNA 7M,human,start codon
-0.4505241215229034,Caduceus 7M,human,start codon
-0.60422682762146,NTv3 8M (pre),human,start codon
-0.6015576124191284,NTv3 100M (pre),human,start codon
-0.6452956795692444,NTv3 650M (pre),human,start codon
-0.6761345267295837,NTv3 650M (post),human,start codon
-0.0185965970158576,BPNet arch. 6M,human,intron
-0.2623045742511749,Residual CNN 44M,human,intron
-0.2623045742511749,HyenaDNA 7M,human,intron
-0.2623045742511749,Caduceus 7M,human,intron
-0.4804849028587341,NTv3 8M (pre),human,intron
-0.482195496559143,NTv3 100M (pre),human,intron
-0.5425574779510498,NTv3 650M (pre),human,intron
-0.5443048477172852,NTv3 650M (post),human,intron
-0.2360571771860122,BPNet arch. 6M,human,exon
-0.2360571771860122,Residual CNN 44M,human,exon
-0.2360571771860122,HyenaDNA 7M,human,exon
-0.2360571771860122,Caduceus 7M,human,exon
-0.6339762210845947,NTv3 8M (pre),human,exon
-0.6433913111686707,NTv3 100M (pre),human,exon
-0.6518793702125549,NTv3 650M (pre),human,exon
-0.6812491416931152,NTv3 650M (post),human,exon
-0.3842235207557678,BPNet arch. 6M,human,splice acceptor
-0.6810190081596375,Residual CNN 44M,human,splice acceptor
-0.6810190081596375,HyenaDNA 7M,human,splice acceptor
-0.6810190081596375,Caduceus 7M,human,splice acceptor
-0.7796080708503723,NTv3 8M (pre),human,splice acceptor
-0.7596970200538635,NTv3 100M (pre),human,splice acceptor
-0.7915040850639343,NTv3 650M (pre),human,splice acceptor
-0.7957100868225098,NTv3 650M (post),human,splice acceptor
-0.1114460304379463,BPNet arch. 6M,human,start codon
-0.3342535495758056,Residual CNN 44M,human,start codon
-0.3342535495758056,HyenaDNA 7M,human,start codon
-0.3342535495758056,Caduceus 7M,human,start codon
-0.5167152881622314,NTv3 8M (pre),human,start codon
-0.5340564250946045,NTv3 100M (pre),human,start codon
-0.6148532032966614,NTv3 650M (pre),human,start codon
-0.6582212448120117,NTv3 650M (post),human,start codon

+MCC,model_name,running_time_hours,species,datasets
+0.334637850522995,NTv2 500M,88.0,cattle,intron
+0.1238768473267555,BPNet arch. 6M,4.0,cattle,intron
+0.383470207452774,Residual CNN 44M,19.0,cattle,intron
+0.3828243613243103,HyenaDNA 7M,23.0,cattle,intron
+0.4733810424804687,Caduceus 7M,32.0,cattle,intron
+0.4315277338027954,Evo2 1B,43.0,cattle,intron
+0.5455867648124695,NTv3 8M (pre),1.0,cattle,intron
+0.5453664064407349,NTv3 100M (pre),2.0,cattle,intron
+0.5628412365913391,NTv3 650M (pre),5.0,cattle,intron
+0.5682631134986877,NTv3 650M (post),7.0,cattle,intron
+0.3689357042312622,NTv2 500M,87.0,cattle,exon
+0.3250860869884491,BPNet arch. 6M,4.0,cattle,exon
+0.4674676060676574,Residual CNN 44M,19.0,cattle,exon
+0.2207767516374588,HyenaDNA 7M,21.0,cattle,exon
+0.4960922300815582,Caduceus 7M,32.0,cattle,exon
+0.4969632029533386,Evo2 1B,44.0,cattle,exon
+0.5432836413383484,NTv3 8M (pre),1.0,cattle,exon
+0.5531933307647705,NTv3 100M (pre),2.0,cattle,exon
+0.591151773929596,NTv3 650M (pre),5.0,cattle,exon
+0.6253225207328796,NTv3 650M (post),7.0,cattle,exon
+0.118808165192604,NTv2 500M,86.0,cattle,splice acceptor
+0.4715546369552612,BPNet arch. 6M,4.0,cattle,splice acceptor
+0.6620649099349976,Residual CNN 44M,19.0,cattle,splice acceptor
+0.104436807334423,HyenaDNA 7M,22.0,cattle,splice acceptor
+0.7064619660377502,Caduceus 7M,30.0,cattle,splice acceptor
+0.2085049450397491,Evo2 1B,43.0,cattle,splice acceptor
+0.7254849076271057,NTv3 8M (pre),1.0,cattle,splice acceptor
+0.7404072880744934,NTv3 100M (pre),2.0,cattle,splice acceptor
+0.7732946872711182,NTv3 650M (pre),5.0,cattle,splice acceptor
+0.7679624557495117,NTv3 650M (post),7.0,cattle,splice acceptor
+0.1412438601255417,NTv2 500M,88.0,cattle,start codon
+0.1490814685821533,BPNet arch. 6M,4.0,cattle,start codon
+0.3243320286273956,Residual CNN 44M,19.0,cattle,start codon
+0.056509330868721,HyenaDNA 7M,23.0,cattle,start codon
+0.3455557227134704,Caduceus 7M,33.0,cattle,start codon
+0.1030694246292114,Evo2 1B,43.0,cattle,start codon
+0.5275959968566895,NTv3 8M (pre),1.0,cattle,start codon
+0.4962065815925598,NTv3 100M (pre),2.0,cattle,start codon
+0.5591813921928406,NTv3 650M (pre),5.0,cattle,start codon
+0.5492052435874939,NTv3 650M (post),7.0,cattle,start codon
+0.0383123345673084,NTv2 500M,90.0,cattle,intron
+0.1015273928642273,BPNet arch. 6M,7.0,cattle,intron
+0.3299930691719055,Residual CNN 44M,23.0,cattle,intron
+0.3826011121273041,HyenaDNA 7M,20.0,cattle,intron
+0.5564854741096497,Caduceus 7M,36.0,cattle,intron
+0.5645747780799866,NTv3 8M (pre),2.0,cattle,intron
+0.5765650272369385,NTv3 100M (pre),2.0,cattle,intron
+0.6140890121459961,NTv3 650M (pre),7.0,cattle,intron
+0.6709504723548889,NTv3 650M (post),10.0,cattle,intron
+0.3665516376495361,NTv2 500M,88.0,cattle,exon
+0.323502242565155,BPNet arch. 6M,7.0,cattle,exon
+0.519285261631012,Residual CNN 44M,23.0,cattle,exon
+0.1038060635328292,HyenaDNA 7M,21.0,cattle,splice acceptor
+0.5072187781333923,Caduceus 7M,39.0,cattle,exon
+0.593974232673645,NTv3 8M (pre),1.0,cattle,exon
+0.6014777421951294,NTv3 100M (pre),2.0,cattle,exon
+0.6433462500572205,NTv3 650M (pre),7.0,cattle,exon
+0.6648420095443726,NTv3 650M (post),9.0,cattle,exon
+0.0937248468399047,NTv2 500M,89.0,cattle,splice acceptor
+0.4435675740242004,BPNet arch. 6M,7.0,cattle,splice acceptor
+0.6590774655342102,Residual CNN 44M,23.0,cattle,splice acceptor
+0.1038060635328292,HyenaDNA 7M,21.0,cattle,splice acceptor
+0.6937510371208191,Caduceus 7M,38.0,cattle,splice acceptor
+0.7248824238777161,NTv3 8M (pre),1.0,cattle,splice acceptor
+0.7345820069313049,NTv3 100M (pre),2.0,cattle,splice acceptor
+0.7439091801643372,NTv3 650M (pre),7.0,cattle,splice acceptor
+0.758992075920105,NTv3 650M (post),9.0,cattle,splice acceptor
+0.1127461418509483,NTv2 500M,88.0,cattle,start codon
+0.0901669710874557,BPNet arch. 6M,7.0,cattle,start codon
+0.3548502624034881,Residual CNN 44M,23.0,cattle,start codon
+0.0545537285506725,HyenaDNA 7M,24.0,cattle,start codon
+0.4038819670677185,Caduceus 7M,38.0,cattle,start codon
+0.5045616030693054,NTv3 8M (pre),1.0,cattle,start codon
+0.4762806594371795,NTv3 100M (pre),3.0,cattle,start codon
+0.5610686540603638,NTv3 650M (pre),7.0,cattle,start codon
+0.5782408118247986,NTv3 650M (post),9.0,cattle,start codon
+0.1547228246927261,NTv2 500M,85.0,cattle,intron
+0.0639578104019165,BPNet arch. 6M,2.0,cattle,start codon
+0.3266464471817016,Residual CNN 44M,23.0,cattle,intron
+0.4240079522132873,HyenaDNA 7M,23.0,cattle,intron
+0.4552704095840454,Caduceus 7M,37.0,cattle,intron
+0.5063548684120178,NTv3 8M (pre),1.0,cattle,intron
+0.5619235038757324,NTv3 100M (pre),3.0,cattle,intron
+0.531277596950531,NTv3 650M (pre),7.0,cattle,intron
+0.6205132603645325,NTv3 650M (post),9.0,cattle,intron
+0.3413117229938507,NTv2 500M,87.0,cattle,exon
+0.1383400112390518,BPNet arch. 6M,6.0,cattle,intron
+0.4856111407279968,Residual CNN 44M,23.0,cattle,exon
+0.2246854901313781,HyenaDNA 7M,70.0,cattle,exon
+0.5370016098022461,Caduceus 7M,35.0,cattle,exon
+0.5721412897109985,NTv3 8M (pre),2.0,cattle,exon
+0.5819903612136841,NTv3 100M (pre),2.0,cattle,exon
+0.6183731555938721,NTv3 650M (pre),7.0,cattle,exon
+0.6233119964599609,NTv3 650M (post),9.0,cattle,exon
+0.4220209121704101,BPNet arch. 6M,7.0,cattle,splice acceptor
+0.689546525478363,Residual CNN 44M,23.0,cattle,splice acceptor
+0.1121769621968269,HyenaDNA 7M,69.0,cattle,splice acceptor
+0.7314619421958923,Caduceus 7M,37.0,cattle,splice acceptor
+0.74350905418396,NTv3 8M (pre),2.0,cattle,splice acceptor
+0.746654748916626,NTv3 100M (pre),2.0,cattle,splice acceptor
+0.7714020609855652,NTv3 650M (pre),7.0,cattle,splice acceptor
+0.7809271812438965,NTv3 650M (post),9.0,cattle,splice acceptor
+0.0930091217160224,BPNet arch. 6M,6.0,cattle,start codon
+0.423166275024414,Residual CNN 44M,23.0,cattle,start codon
+0.1253955662250518,HyenaDNA 7M,72.0,cattle,start codon
+0.33419930934906,Caduceus 7M,37.0,cattle,start codon
+0.4639334082603454,NTv3 8M (pre),1.0,cattle,start codon
+0.5102551579475403,NTv3 100M (pre),2.0,cattle,start codon
+0.5866840481758118,NTv3 650M (pre),7.0,cattle,start codon
+0.588148832321167,NTv3 650M (post),9.0,cattle,start codon
+0.4777896404266357,NTv2 500M,33.0,tomato,intron
+0.3216900527477264,BPNet arch. 6M,1.0,tomato,intron
+0.46840900182724,Residual CNN 44M,6.0,tomato,intron
+0.5251263380050659,PlantCAD2 88M,38.0,tomato,intron
+0.747674286365509,Evo2 1B,13.0,tomato,intron
+0.6858112812042236,NTv3 8M (pre),0.0,tomato,intron
+0.7038365006446838,NTv3 100M (pre),0.0,tomato,intron
+0.7481895685195923,NTv3 650M (pre),1.0,tomato,intron
+0.7458349466323853,NTv3 650M (post),2.0,tomato,intron
+0.6147475838661194,NTv2 500M,33.0,tomato,exon
+0.4551227986812591,BPNet arch. 6M,1.0,tomato,exon
+0.5068296194076538,Residual CNN 44M,6.0,tomato,exon
+0.7256030440330505,PlantCAD2 88M,37.0,tomato,exon
+0.7006198763847351,Evo2 1B,14.0,tomato,exon
+0.7537696361541748,NTv3 8M (pre),0.0,tomato,exon
+0.7484462857246399,NTv3 100M (pre),0.0,tomato,exon
+0.764011561870575,NTv3 650M (pre),1.0,tomato,exon
+0.7750575542449951,NTv3 650M (post),2.0,tomato,exon
+0.1691933125257492,NTv2 500M,33.0,tomato,splice acceptor
+0.125656172633171,BPNet arch. 6M,1.0,tomato,splice acceptor
+0.4359458982944488,Residual CNN 44M,6.0,tomato,splice acceptor
+0.744257926940918,PlantCAD2 88M,38.0,tomato,splice acceptor
+0.3791649639606476,Evo2 1B,13.0,tomato,splice acceptor
+0.6623862385749817,NTv3 8M (pre),0.0,tomato,splice acceptor
+0.6843105554580688,NTv3 100M (pre),0.0,tomato,splice acceptor
+0.7641868591308594,NTv3 650M (pre),1.0,tomato,splice acceptor
+0.7584431767463684,NTv3 650M (post),2.0,tomato,splice acceptor
+0.132934883236885,NTv2 500M,34.0,tomato,start codon
+0.0,BPNet arch. 6M,1.0,tomato,start codon
+0.088478960096836,Residual CNN 44M,6.0,tomato,start codon
+0.2019559442996978,PlantCAD2 88M,38.0,tomato,start codon
+0.1622217148542404,Evo2 1B,13.0,tomato,start codon
+0.2966536581516266,NTv3 8M (pre),0.0,tomato,start codon
+0.3968957066535949,NTv3 100M (pre),0.0,tomato,start codon
+0.4830105900764465,NTv3 650M (pre),1.0,tomato,start codon
+0.5007501244544983,NTv3 650M (post),2.0,tomato,start codon
+0.6770024299621582,NTv2 500M,33.0,tomato,intron
+0.2927957773208618,BPNet arch. 6M,2.0,tomato,intron
+0.1383400112390518,Residual CNN 44M,6.0,tomato,intron
+0.7252154350280762,PlantCAD2 88M,46.0,tomato,intron
+0.712181031703949,NTv3 8M (pre),1.0,tomato,intron
+0.7515084147453308,NTv3 100M (pre),1.0,tomato,intron
+0.7400797009468079,NTv3 650M (pre),3.0,tomato,intron
+0.7532288432121277,NTv3 650M (post),4.0,tomato,intron
+0.5751976370811462,NTv2 500M,33.0,tomato,exon
+0.3057552278041839,BPNet arch. 6M,3.0,tomato,exon
+0.7699167728424072,PlantCAD2 88M,50.0,tomato,exon
+0.748009443283081,NTv3 8M (pre),1.0,tomato,exon
+0.7629056572914124,NTv3 100M (pre),1.0,tomato,exon
+0.7755228877067566,NTv3 650M (pre),3.0,tomato,exon
+0.782516598701477,NTv3 650M (post),4.0,tomato,exon
+0.168193981051445,NTv2 500M,33.0,tomato,splice acceptor
+0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
+0.4833243191242218,Residual CNN 44M,8.0,tomato,splice acceptor
+0.7335307598114014,PlantCAD2 88M,46.0,tomato,splice acceptor
+0.6908777952194214,NTv3 8M (pre),1.0,tomato,splice acceptor
+0.7348777055740356,NTv3 100M (pre),1.0,tomato,splice acceptor
+0.7484620809555054,NTv3 650M (pre),3.0,tomato,splice acceptor
+0.7539154291152954,NTv3 650M (post),4.0,tomato,splice acceptor
+0.1586925536394119,NTv2 500M,33.0,tomato,start codon
+0.0,BPNet arch. 6M,2.0,tomato,start codon
+0.1107296794652938,Residual CNN 44M,8.0,tomato,start codon
+0.3756755590438843,PlantCAD2 88M,48.0,tomato,start codon
+0.4113904237747192,NTv3 8M (pre),1.0,tomato,start codon
+0.4541433155536651,NTv3 100M (pre),1.0,tomato,start codon
+0.5002310872077942,NTv3 650M (pre),3.0,tomato,start codon
+0.5470007658004761,NTv3 650M (post),4.0,tomato,start codon
+0.6712294220924377,NTv2 500M,33.0,tomato,intron
+0.3502058088779449,BPNet arch. 6M,2.0,tomato,intron
+0.5514466166496277,Residual CNN 44M,8.0,tomato,intron
+0.722817599773407,PlantCAD2 88M,88.0,tomato,intron
+0.7013162970542908,NTv3 8M (pre),1.0,tomato,intron
+0.747364342212677,NTv3 100M (pre),1.0,tomato,intron
+0.752423107624054,NTv3 650M (pre),3.0,tomato,intron
+0.7750566005706787,NTv3 650M (post),4.0,tomato,intron
+0.6022632718086243,NTv2 500M,33.0,tomato,exon
+0.3020758032798767,BPNet arch. 6M,2.0,tomato,exon
+0.4746756553649902,Residual CNN 44M,8.0,tomato,exon
+0.7354215979576111,PlantCAD2 88M,45.0,tomato,exon
+0.7157281041145325,NTv3 8M (pre),1.0,tomato,exon
+0.7326820492744446,NTv3 100M (pre),1.0,tomato,exon
+0.7308483123779297,NTv3 650M (pre),3.0,tomato,exon
+0.7417197823524475,NTv3 650M (post),4.0,tomato,exon
+0.1558358669281005,NTv2 500M,33.0,tomato,splice acceptor
+0.0,BPNet arch. 6M,2.0,tomato,splice acceptor
+0.3391502797603607,Residual CNN 44M,8.0,tomato,splice acceptor
+0.7305923700332642,PlantCAD2 88M,85.0,tomato,splice acceptor
+0.6977006196975708,NTv3 8M (pre),1.0,tomato,splice acceptor
+0.6770275831222534,NTv3 100M (pre),1.0,tomato,splice acceptor
+0.6770390272140503,NTv3 650M (pre),3.0,tomato,splice acceptor
+0.7287323474884033,NTv3 650M (post),4.0,tomato,splice acceptor
+0.1887903958559036,NTv2 500M,33.0,tomato,start codon
+0.0639578104019165,BPNet arch. 6M,2.0,tomato,start codon
+0.0914037525653839,Residual CNN 44M,8.0,tomato,start codon
+0.4881043434143066,PlantCAD2 88M,88.0,tomato,start codon
+0.4309621453285217,NTv3 8M (pre),1.0,tomato,start codon
+0.4028272926807403,NTv3 100M (pre),1.0,tomato,start codon
+0.4060510396957397,NTv3 650M (pre),3.0,tomato,start codon
+0.472331553697586,NTv3 650M (post),4.0,tomato,start codon
+0.1995969861745834,NTv2 500M,72.0,human,intron
+0.0296161584556102,BPNet arch. 6M,3.0,human,intron
+0.2347834408283233,Residual CNN 44M,15.0,human,intron
+0.33451908826828,HyenaDNA 7M,17.0,human,intron
+0.4144788980484009,Caduceus 7M,27.0,human,intron
+0.0,Evo2 1B,34.0,human,intron
+0.4695742726325989,NTv3 8M (pre),1.0,human,intron
+0.475054919719696,NTv3 100M (pre),2.0,human,intron
+0.5504136681556702,NTv3 650M (pre),5.0,human,intron
+0.5643875002861023,NTv3 650M (post),6.0,human,intron
+0.1995969861745834,NTv2 500M,72.0,human,intron
+0.2706590592861175,BPNet arch. 6M,3.0,human,exon
+0.2678671479225158,Residual CNN 44M,15.0,human,exon
+0.179698497056961,HyenaDNA 7M,19.0,human,exon
+0.5098947286605835,Caduceus 7M,26.0,human,exon
+0.4510694444179535,Evo2 1B,34.0,human,exon
+0.6089931726455688,NTv3 8M (pre),1.0,human,exon
+0.6492856740951538,NTv3 100M (pre),2.0,human,exon
+0.6975767016410828,NTv3 650M (pre),5.0,human,exon
+0.6822624206542969,NTv3 650M (post),8.0,human,exon
+0.1493269056081771,NTv2 500M,73.0,human,splice acceptor
+0.3807527124881744,BPNet arch. 6M,3.0,human,splice acceptor
+0.6632664203643799,Residual CNN 44M,15.0,human,splice acceptor
+0.1002769619226455,HyenaDNA 7M,17.0,human,splice acceptor
+0.7357247471809387,Caduceus 7M,24.0,human,splice acceptor
+0.1821079105138778,Evo2 1B,34.0,human,splice acceptor
+0.7726271748542786,NTv3 8M (pre),1.0,human,splice acceptor
+0.77947598695755,NTv3 100M (pre),2.0,human,splice acceptor
+0.8028115034103394,NTv3 650M (pre),5.0,human,splice acceptor
+0.7979229092597961,NTv3 650M (post),7.0,human,splice acceptor
+0.139576569199562,NTv2 500M,73.0,human,start codon
+0.1334401220083236,BPNet arch. 6M,3.0,human,start codon
+0.3876807987689972,Residual CNN 44M,15.0,human,start codon
+0.1003016158938407,HyenaDNA 7M,18.0,human,start codon
+0.3958532512187958,Caduceus 7M,24.0,human,start codon
+0.1399599611759185,Evo2 1B,34.0,human,start codon
+0.540923535823822,NTv3 8M (pre),1.0,human,start codon
+0.5464004278182983,NTv3 100M (pre),2.0,human,start codon
+0.6803378462791443,NTv3 650M (pre),5.0,human,start codon
+0.7310947179794312,NTv3 650M (post),7.0,human,start codon
+0.0814515128731727,NTv2 500M,72.0,human,intron
+0.0172978900372982,BPNet arch. 6M,5.0,human,intron
+0.2740728259086609,Residual CNN 44M,19.0,human,intron
+0.3312098085880279,HyenaDNA 7M,21.0,human,intron
+0.5108950138092041,Caduceus 7M,33.0,human,intron
+0.5034915208816528,NTv3 8M (pre),1.0,human,intron
+0.5154411792755127,NTv3 100M (pre),2.0,human,intron
+0.5814740061759949,NTv3 650M (pre),6.0,human,intron
+0.5920455455780029,NTv3 650M (post),8.0,human,intron
+0.3505669236183166,NTv2 500M,72.0,human,exon
+0.2252149283885955,BPNet arch. 6M,5.0,human,exon
+0.4010578095912933,Residual CNN 44M,18.0,human,exon
+0.1851459741592407,HyenaDNA 7M,18.0,human,exon
+0.4599409103393554,Caduceus 7M,33.0,human,exon
+0.5931490063667297,NTv3 8M (pre),1.0,human,exon
+0.6058318018913269,NTv3 100M (pre),2.0,human,exon
+0.6738048791885376,NTv3 650M (pre),6.0,human,exon
+0.6738048791885376,NTv3 650M (post),6.0,human,exon
+0.1533636748790741,NTv2 500M,72.0,human,splice acceptor
+0.3751010596752167,BPNet arch. 6M,5.0,human,splice acceptor
+0.681228756904602,Residual CNN 44M,19.0,human,splice acceptor
+0.0252278540283441,HyenaDNA 7M,22.0,human,splice acceptor
+0.7485092878341675,Caduceus 7M,35.0,human,splice acceptor
+0.7772909998893738,NTv3 8M (pre),1.0,human,splice acceptor
+0.794090747833252,NTv3 100M (pre),2.0,human,splice acceptor
+0.8239933252334595,NTv3 650M (pre),6.0,human,splice acceptor
+0.804115891456604,NTv3 650M (post),8.0,human,splice acceptor
+0.0851806029677391,NTv2 500M,72.0,human,start codon
+0.0,BPNet arch. 6M,5.0,human,start codon
+0.3292546272277832,Residual CNN 44M,19.0,human,start codon
+0.0647941380739212,HyenaDNA 7M,20.0,human,start codon
+0.4505241215229034,Caduceus 7M,33.0,human,start codon
+0.60422682762146,NTv3 8M (pre),1.0,human,start codon
+0.6015576124191284,NTv3 100M (pre),2.0,human,start codon
+0.6452956795692444,NTv3 650M (pre),6.0,human,start codon
+0.6761345267295837,NTv3 650M (post),8.0,human,start codon
+0.0558800511062145,NTv2 500M,70.0,human,intron
+0.0185965970158576,BPNet arch. 6M,5.0,human,intron
+0.2623045742511749,Residual CNN 44M,18.0,human,intron
+0.3633092641830444,HyenaDNA 7M,68.0,human,intron
+0.4261827170848846,Caduceus 7M,29.0,human,intron
+0.4804849028587341,NTv3 8M (pre),1.0,human,intron
+0.482195496559143,NTv3 100M (pre),2.0,human,intron
+0.5425574779510498,NTv3 650M (pre),6.0,human,intron
+0.5443048477172852,NTv3 650M (post),8.0,human,intron
+0.3958893716335296,NTv2 500M,71.0,human,exon
+0.2360571771860122,BPNet arch. 6M,5.0,human,exon
+0.2360571771860122,Residual CNN 44M,5.0,human,exon
+0.1936572045087814,HyenaDNA 7M,68.0,human,exon
+0.5046994090080261,Caduceus 7M,29.0,human,exon
+0.6339762210845947,NTv3 8M (pre),1.0,human,exon
+0.6433913111686707,NTv3 100M (pre),2.0,human,exon
+0.6518793702125549,NTv3 650M (pre),6.0,human,exon
+0.6812491416931152,NTv3 650M (post),8.0,human,exon
+0.1248077526688575,NTv2 500M,70.0,human,splice acceptor
+0.3842235207557678,BPNet arch. 6M,5.0,human,splice acceptor
+0.6810190081596375,Residual CNN 44M,18.0,human,splice acceptor
+0.0527583621442317,HyenaDNA 7M,17.0,human,splice acceptor
+0.7072214484214783,Caduceus 7M,29.0,human,splice acceptor
+0.7796080708503723,NTv3 8M (pre),1.0,human,splice acceptor
+0.7596970200538635,NTv3 100M (pre),2.0,human,splice acceptor
+0.7915040850639343,NTv3 650M (pre),6.0,human,splice acceptor
+0.7957100868225098,NTv3 650M (post),8.0,human,splice acceptor
+0.1267423331737518,NTv2 500M,70.0,human,start codon
+0.1114460304379463,BPNet arch. 6M,5.0,human,start codon
+0.3342535495758056,Residual CNN 44M,18.0,human,start codon
+0.1215013489127159,HyenaDNA 7M,18.0,human,start codon
+0.4082835018634796,Caduceus 7M,29.0,human,start codon
+0.5167152881622314,NTv3 8M (pre),1.0,human,start codon
+0.5340564250946045,NTv3 100M (pre),2.0,human,start codon
+0.6148532032966614,NTv3 650M (pre),6.0,human,start codon
+0.6582212448120117,NTv3 650M (post),8.0,human,start codon

data/bigwig_dataset.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/streamlit_app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import streamlit as st
 import plotly.express as px
 from plotly.subplots import make_subplots
 import plotly.graph_objects as go
 # ---------------------------------------------------------------------
 # Page config (must be the first Streamlit command)
@@ -68,6 +69,24 @@ MODEL_COLORS = {
     "Caduceus 7M": COLORS["purple_2"]
 }
 MODEL_NAMES = list(MODEL_COLORS.keys())
 PLANT_SPECIES = ["tomato", "rice", "maize", "arabidopsis"]
@@ -81,12 +100,25 @@ SPECIES_GROUPS = {
 _LAST_UPDATED = "Dec 10, 2025"
 _INTRO = """
-Benchmark across gene annotation and functionnal tracks.
-- **Pearson correlations (multi-assay)**: per-dataset scores across species and models.
-- **MCC (bed tracks)**: per-track MCC values across species and models.
-These tasks measure the model's ability the generalize to unseen tracks, species and assay types.
 """
 HERE = os.path.dirname(os.path.abspath(__file__))  # /app/src
@@ -111,6 +143,9 @@ def load_raw_data():
     return pearson_df, mcc_df
 @st.cache_data
 def load_expanded_data():
@@ -129,6 +164,12 @@ def load_expanded_data():
       we average their Score.
     """
     pearson_df, mcc_df = load_raw_data()
     # --- Pearson correlations ---
     # Expect columns: species, assay_type, datasets, model_name, pearson correlation
@@ -143,10 +184,14 @@ def load_expanded_data():
     if "assay_type" in pearson_df.columns:
         pearson_group_cols.append("assay_type")
     pearson_df = (
         pearson_df
-        .groupby(pearson_group_cols, as_index=False, dropna=False)["Score"]
-        .mean()
     )
     # --- MCC (bed tracks) ---
@@ -160,10 +205,14 @@ def load_expanded_data():
     # Collapse duplicates with same (species, datasets, Model)
     mcc_group_cols = ["species", "datasets", "Model"]
     mcc_df = (
         mcc_df
-        .groupby(mcc_group_cols, as_index=False, dropna=False)["Score"]
-        .mean()
     )
     # Optional sanity checks
@@ -255,7 +304,7 @@ def build_leaderboard(
     )
     if df.empty:
-        return pd.DataFrame(columns=["Model", "Num entries", "Mean score"])
     agg = (
         df.groupby("Model")["Score"]
@@ -269,15 +318,22 @@ def build_leaderboard(
         df.groupby("Model")["Score"].count().reindex(agg["Model"]).values
     )
     agg = agg.sort_values("Mean score", ascending=False).reset_index(drop=True)
-    agg = agg[["Model", "Num entries", "Mean score"]]
     # Ensure the index starts with 1
     agg.index += 1
     return agg
 def build_bar_df(
     benchmark_name: str,
     selected_species: List[str],
@@ -587,6 +643,39 @@ def plot_radar(
     return fig
 # ---------------------------------------------------------------------
 # UI helpers
@@ -604,12 +693,9 @@ def sidebar_toggle(label: str, value: bool = False, key: str | None = None) -> b
     # Fallback for older Streamlit versions
     return st.sidebar.checkbox(label, value=value, key=key)
 # ---------------------------------------------------------------------
 # Streamlit UI
 # ---------------------------------------------------------------------
 def main():
     st.title("🧬 NTv3 Benchmark")
     st.markdown(_INTRO)
@@ -653,7 +739,6 @@ def main():
     if not available_species:
         st.sidebar.info(f"No {species_group.lower()} species available for this benchmark.")
     # Assay toggles (Pearson only), based on filtered species
     if cfg.get("has_assay_type", False):
         st.sidebar.subheader("Assay types")
@@ -673,7 +758,6 @@ def main():
     else:
         selected_assays = []
     # Bed track / dataset toggles (MCC only), based on species selection
     selected_datasets: List[str] = []
     if benchmark_name == "Genome Annotation":
@@ -707,8 +791,8 @@ def main():
     col1, col2 = st.columns([1, 1])
     with col1:
-        st.subheader("🏅 Leaderboard (per model)")
-        st.write("\n") # 👈 spacer to match plotly padding
         st.write("\n")
         st.write("\n")
         if leaderboard_df.empty:
@@ -716,7 +800,6 @@ def main():
         else:
             st.dataframe(leaderboard_df, use_container_width=True)
     with col2:
         st.subheader("📈 Mean score per model")
         if bar_df.empty:
@@ -724,7 +807,6 @@ def main():
         else:
             # Order models by performance (least -> most)
             bar_df = bar_df.sort_values("Mean score", ascending=True)
             model_order = bar_df["Model"].tolist()
             fig = px.bar(
@@ -733,30 +815,28 @@ def main():
                 y="Mean score",
                 color="Model",
                 color_discrete_map=MODEL_COLORS,
-                category_orders={"Model": model_order},  # enforce ordering on x
             )
             fig.update_layout(
                 barmode="group",
-                height=500,
                 xaxis_title="",
-                yaxis_title="Mean score",
                 plot_bgcolor="rgba(0,0,0,0)",
                 paper_bgcolor="rgba(0,0,0,0)",
                 bargap=0.08,
             )
-            # Hide x-axis model names (same style as the panels)
             fig.update_xaxes(showticklabels=False)
             st.plotly_chart(fig, use_container_width=True)
     # --- Breakdown plot: assay_type (Functional Tracks) OR datasets (Genome Annotation) ---
     breakdown_df = build_category_model_df(
         benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
     )
-    st.subheader("🧪 Mean score by assay type / dataset (all models)")
     if breakdown_df.empty:
         st.info("No data for the selected filters.")
     else:
@@ -767,30 +847,13 @@ def main():
         )
         st.plotly_chart(fig_breakdown, use_container_width=True)
-    st.subheader("🕸️ Performance by assay type / dataset (radar)")
-    radar_df = build_radar_df(
-        benchmark_name,
-        selected_species,
-        selected_assays,
-        selected_models,
-        selected_datasets,
-    )
-    if radar_df.empty:
-        st.info("No data for the selected filters.")
-    else:
-        fig_radar = plot_radar(
-            radar_df,
-            metric_label=cfg["metric_label"],
-        )
-        st.plotly_chart(fig_radar, use_container_width=True)
-    st.subheader("⚖️ Model comparison")
     left, right = st.columns([1, 1], gap="large")
     with left:
-        st.markdown("#### Head-to-head (per track)")
         model_picker_options = selected_models if selected_models else _ALL_MODELS
         default_a = model_picker_options[0] if model_picker_options else _ALL_MODELS[0]
@@ -839,7 +902,6 @@ def main():
                 if c in scatter_df.columns:
                     hover_cols.append(c)
-            # Model A on Y, Model B on X
             fig_scatter = px.scatter(
                 scatter_df,
                 x="Model B",
@@ -848,7 +910,6 @@ def main():
                 hover_data=hover_cols,
             )
-            # Red diagonal y=x
             fig_scatter.add_shape(
                 type="line",
                 x0=axis_range[0], y0=axis_range[0],
@@ -857,7 +918,6 @@ def main():
                 line=dict(color="red", dash="dot", width=2),
             )
-            # Square + identical scale/ticks (works even with use_container_width=True)
             fig_scatter.update_layout(
                 height=550,
                 margin=dict(l=60, r=20, t=20, b=60),
@@ -871,7 +931,7 @@ def main():
                     title=f"{model_a} — {cfg['metric_label']}",
                     range=axis_range,
                     dtick=tick_step,
-                    scaleanchor="x",   # lock y to x
                     scaleratio=1,
                     constrain="domain",
                 ),
@@ -882,9 +942,9 @@ def main():
             st.plotly_chart(fig_scatter, use_container_width=True)
     with right:
-        st.markdown("#### All models (distribution across tracks)")
-        violin_df = build_violin_df(
             benchmark_name,
             selected_species,
             selected_assays,
@@ -892,49 +952,87 @@ def main():
             selected_datasets,
         )
-        if violin_df.empty:
-            st.info("No data for the selected filters.")
         else:
-                # Order models by median performance (least -> most)
-            model_order = (
-                violin_df
-                .groupby("Model")["Score"]
-                .median()
-                .sort_values(ascending=True)
-                .index
-                .tolist()
-            )
-            fig_violin = px.violin(
-                violin_df,
-                x="Model",
-                y="Score",
-                color="Model",
-                color_discrete_map=MODEL_COLORS,
-                box=True,            # keep inner boxplot
-                points=False,        # 👈 remove all dots
-                category_orders={"Model": model_order},  # 👈 enforce ordering
             )
-            fig_violin.update_layout(
-                height=650,
-                xaxis_title="",
-                yaxis_title=cfg["metric_label"],
                 plot_bgcolor="rgba(0,0,0,0)",
                 paper_bgcolor="rgba(0,0,0,0)",
-                showlegend=False,
             )
-            fig_violin.update_traces(
-                box_visible=True,
-                meanline_visible=False,
             )
-            # Optional: hide model names if you prefer a cleaner look
-            # fig_violin.update_xaxes(showticklabels=False)
-            st.plotly_chart(fig_violin, use_container_width=True)
 if __name__ == "__main__":

 import plotly.express as px
 from plotly.subplots import make_subplots
 import plotly.graph_objects as go
+import numpy as np
 # ---------------------------------------------------------------------
 # Page config (must be the first Streamlit command)
     "Caduceus 7M": COLORS["purple_2"]
 }
+MODEL_TRAINING_STATUS = {
+    "NTv3 650M (post)": "POST",
+    "NTv3 650M (pre)": "PRE",
+    "NTv3 100M (pre)": "PRE",
+    "NTv3 8M (pre)": "PRE",
+    "Residual CNN 44M": "SCRATCH",
+    "Caduceus 7M": "PRE",
+    "Evo2 1B": "PRE",
+    "NTv2 500M": "PRE",
+    "BPNet arch. 6M": "SCRATCH",
+    "PlantCAD2 88M": "PRE",
+}
+MODEL_GPU_MULTIPLIER = {
+    "Evo2 1B": 8,   # trained on 8 GPUs
+}
 MODEL_NAMES = list(MODEL_COLORS.keys())
 PLANT_SPECIES = ["tomato", "rice", "maize", "arabidopsis"]
 _LAST_UPDATED = "Dec 10, 2025"
 _INTRO = """
+The **NTv3 Benchmark** is a curated benchmark of 106 long-range genomic datasets
+designed to evaluate models under realistic 32 kb input, single-base-pair output settings.
+The dataset spans two complementary task families: genome annotation (exon, intron, splice acceptor, start codon)
+and functional-regulatory prediction, which includes diverse experimental tracks such as chromatin accessibility,
+ histone modifications, transcription initiation (PRO-cap), RNA binding (eCLIP), gene expression (RNA-seq),
+ and translation (Ribo-seq).
+Data are drawn from a phylogenetically diverse set of species, including organisms seen during post-training
+(human, chicken, Arabidopsis, rice, maize) and entirely unseen species (cattle, tomato), with careful curation
+to avoid data leakage. This design allows the dataset to probe long-range sequence-to-function mapping,
+cross-species generalization, and transfer across heterogeneous regulatory modalities,
+including assays not present in prior multispecies training corpora. By standardizing sequence length,
+resolution, and evaluation metrics across all tracks, \brandbenchmark provides a controlled dataset
+for comparing representation quality across genomic foundation models.
+The metrics used are:
+- **Pearson correlations (multi-assay)**: per-dataset scores across species and models for functional tracks.
+- **MCC (bed tracks)**: per-track MCC values across species and models for gene annotation tracks.
 """
 HERE = os.path.dirname(os.path.abspath(__file__))  # /app/src
     return pearson_df, mcc_df
+def _normalize_training_hours(df: pd.DataFrame) -> pd.DataFrame:
+    return df.rename(columns={"running_time_hours": "GPU hours"})
 @st.cache_data
 def load_expanded_data():
       we average their Score.
     """
     pearson_df, mcc_df = load_raw_data()
+    pearson_df = _normalize_training_hours(pearson_df)
+    mcc_df = _normalize_training_hours(mcc_df)
+    if "track_name_clean" in pearson_df.columns:
+        pearson_df = pearson_df.drop(columns=["datasets"], errors="ignore")
+        pearson_df = pearson_df.rename(columns={"track_name_clean": "datasets"})
     # --- Pearson correlations ---
     # Expect columns: species, assay_type, datasets, model_name, pearson correlation
     if "assay_type" in pearson_df.columns:
         pearson_group_cols.append("assay_type")
+    agg_cols = {"Score": "mean"}
+    if "GPU hours" in pearson_df.columns:
+        agg_cols["GPU hours"] = "mean"
     pearson_df = (
         pearson_df
+        .groupby(pearson_group_cols, as_index=False, dropna=False)
+        .agg(agg_cols)
     )
     # --- MCC (bed tracks) ---
     # Collapse duplicates with same (species, datasets, Model)
     mcc_group_cols = ["species", "datasets", "Model"]
+    agg_cols = {"Score": "mean"}
+    if "GPU hours" in mcc_df.columns:
+        agg_cols["GPU hours"] = "mean"
     mcc_df = (
         mcc_df
+        .groupby(mcc_group_cols, as_index=False, dropna=False)
+        .agg(agg_cols)
     )
     # Optional sanity checks
     )
     if df.empty:
+        return pd.DataFrame(columns=["Model", "Model Type", "Num entries", "Mean score"])
     agg = (
         df.groupby("Model")["Score"]
         df.groupby("Model")["Score"].count().reindex(agg["Model"]).values
     )
+    # 👇 Add training regime column
+    agg["Training"] = agg["Model"].map(MODEL_TRAINING_STATUS).fillna("UNKNOWN")
+    # Sort by performance
     agg = agg.sort_values("Mean score", ascending=False).reset_index(drop=True)
+    # Column order
+    agg = agg[["Model", "Training", "Num entries", "Mean score"]]
     # Ensure the index starts with 1
     agg.index += 1
     return agg
 def build_bar_df(
     benchmark_name: str,
     selected_species: List[str],
     return fig
+def build_convergence_df(
+    benchmark_name: str,
+    selected_species: List[str],
+    selected_assays: List[str],
+    selected_models: List[str],
+    selected_datasets: List[str],
+) -> pd.DataFrame:
+    df = filter_base_df(
+        benchmark_name,
+        selected_species,
+        selected_assays,
+        selected_models,
+        selected_datasets,
+    )
+    if df.empty or "GPU hours" not in df.columns:
+        return pd.DataFrame(columns=["Model", "GPU hours", "Performance"])
+    out = (
+        df.groupby("Model", as_index=False)
+        .agg({"Score": "mean", "GPU hours": "mean"})
+        .rename(columns={"Score": "Performance"})
+    )
+    # Apply per-model multiplier (default 1)
+    out["GPU multiplier"] = out["Model"].map(MODEL_GPU_MULTIPLIER).fillna(1).astype(float)
+    out["GPU hours"] = out["GPU hours"] * out["GPU multiplier"]
+    out = out.dropna(subset=["GPU hours", "Performance"])
+    out["Performance"] = out["Performance"].round(3)
+    out["GPU hours"] = out["GPU hours"].round(1)
+    return out
 # ---------------------------------------------------------------------
 # UI helpers
     # Fallback for older Streamlit versions
     return st.sidebar.checkbox(label, value=value, key=key)
 # ---------------------------------------------------------------------
 # Streamlit UI
 # ---------------------------------------------------------------------
 def main():
     st.title("🧬 NTv3 Benchmark")
     st.markdown(_INTRO)
     if not available_species:
         st.sidebar.info(f"No {species_group.lower()} species available for this benchmark.")
     # Assay toggles (Pearson only), based on filtered species
     if cfg.get("has_assay_type", False):
         st.sidebar.subheader("Assay types")
     else:
         selected_assays = []
     # Bed track / dataset toggles (MCC only), based on species selection
     selected_datasets: List[str] = []
     if benchmark_name == "Genome Annotation":
     col1, col2 = st.columns([1, 1])
     with col1:
+        st.subheader("🏅 Leaderboard")
+        st.write("\n")  # spacer to match plotly padding
         st.write("\n")
         st.write("\n")
         if leaderboard_df.empty:
         else:
             st.dataframe(leaderboard_df, use_container_width=True)
     with col2:
         st.subheader("📈 Mean score per model")
         if bar_df.empty:
         else:
             # Order models by performance (least -> most)
             bar_df = bar_df.sort_values("Mean score", ascending=True)
             model_order = bar_df["Model"].tolist()
             fig = px.bar(
                 y="Mean score",
                 color="Model",
                 color_discrete_map=MODEL_COLORS,
+                category_orders={"Model": model_order},
             )
             fig.update_layout(
                 barmode="group",
+                height=480,
                 xaxis_title="",
+                yaxis_title=cfg["metric_label"],
                 plot_bgcolor="rgba(0,0,0,0)",
                 paper_bgcolor="rgba(0,0,0,0)",
                 bargap=0.08,
             )
             fig.update_xaxes(showticklabels=False)
             st.plotly_chart(fig, use_container_width=True)
     # --- Breakdown plot: assay_type (Functional Tracks) OR datasets (Genome Annotation) ---
     breakdown_df = build_category_model_df(
         benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
     )
+    type_of_data = "assay type" if benchmark_name == "Functional Tracks" else "gene annotation"
+    st.subheader(f"🧪 Mean score by {type_of_data}")
     if breakdown_df.empty:
         st.info("No data for the selected filters.")
     else:
         )
         st.plotly_chart(fig_breakdown, use_container_width=True)
+    # ------------------------------------------------------------------
+    # Model comparison: Head-to-head (left) + Convergence (right)
+    # ------------------------------------------------------------------
     left, right = st.columns([1, 1], gap="large")
     with left:
+        st.markdown("#### ⚖️ Head-to-head (per track)")
         model_picker_options = selected_models if selected_models else _ALL_MODELS
         default_a = model_picker_options[0] if model_picker_options else _ALL_MODELS[0]
                 if c in scatter_df.columns:
                     hover_cols.append(c)
             fig_scatter = px.scatter(
                 scatter_df,
                 x="Model B",
                 hover_data=hover_cols,
             )
             fig_scatter.add_shape(
                 type="line",
                 x0=axis_range[0], y0=axis_range[0],
                 line=dict(color="red", dash="dot", width=2),
             )
             fig_scatter.update_layout(
                 height=550,
                 margin=dict(l=60, r=20, t=20, b=60),
                     title=f"{model_a} — {cfg['metric_label']}",
                     range=axis_range,
                     dtick=tick_step,
+                    scaleanchor="x",
                     scaleratio=1,
                     constrain="domain",
                 ),
             st.plotly_chart(fig_scatter, use_container_width=True)
     with right:
+        st.markdown("#### ⏱️ Time to convergence")
+        conv_df = build_convergence_df(
             benchmark_name,
             selected_species,
             selected_assays,
             selected_datasets,
         )
+        if conv_df.empty:
+            st.info("No training-time data found for the selected filters (missing 'GPU hours').")
         else:
+            fig_conv = px.scatter(
+                conv_df,
+                x="GPU hours",
+                y="Performance",
+                text="Model",
+                hover_data=["Model", "GPU hours", "Performance"],
             )
+            fig_conv.update_traces(textposition="top center")
+            fig_conv.update_layout(
+                height=630,  # match left panel height
+                xaxis=dict(
+                    title="GPU hours",
+                    type="log",          # 👈 log scale
+                ),
+                yaxis=dict(
+                    title=cfg["metric_label"],
+                ),
                 plot_bgcolor="rgba(0,0,0,0)",
                 paper_bgcolor="rgba(0,0,0,0)",
             )
+            fig_conv.update_xaxes(
+                type="log",
+                range=[0, np.log10(conv_df["GPU hours"].max())],  # log10(1) = 0
+                title="GPU hours (log scale)",
             )
+            st.plotly_chart(fig_conv, use_container_width=True)
+    # ------------------------------------------------------------------
+    # Violin (full width, below)
+    # ------------------------------------------------------------------
+    st.subheader("🎻 Performance comparaison across tracks")
+    violin_df = build_violin_df(
+        benchmark_name,
+        selected_species,
+        selected_assays,
+        selected_models,
+        selected_datasets,
+    )
+    if violin_df.empty:
+        st.info("No data for the selected filters.")
+    else:
+        model_order = (
+            violin_df
+            .groupby("Model")["Score"]
+            .median()
+            .sort_values(ascending=True)
+            .index
+            .tolist()
+        )
+        fig_violin = px.violin(
+            violin_df,
+            x="Model",
+            y="Score",
+            color="Model",
+            color_discrete_map=MODEL_COLORS,
+            box=True,
+            points=False,
+            category_orders={"Model": model_order},
+        )
+        fig_violin.update_layout(
+            height=650,
+            xaxis_title="",
+            yaxis_title=cfg["metric_label"],
+            plot_bgcolor="rgba(0,0,0,0)",
+            paper_bgcolor="rgba(0,0,0,0)",
+            showlegend=False,
+        )
+        fig_violin.update_traces(
+            box_visible=True,
+            meanline_visible=False,
+        )
+        st.plotly_chart(fig_violin, use_container_width=True)
 if __name__ == "__main__":