MidAtBest commited on
Commit
988fb43
·
1 Parent(s): dea7853

feat: example streamlit app

Browse files
data/bed_dataset.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ species,datasets,MCC
2
+ Human,Intron,"[0.893,0.170,0.006,0.828,0.235,0.114,0.707,0.904,0.829,0.474]"
3
+ Human,Exon,"[0.294,0.007,0.428,0.341,0.757,0.442,0.512,0.860,0.884,0.873]"
4
+ Human,Splice_acceptor,"[0.057,0.279,0.129,0.844,0.272,0.174,0.082,0.603,0.277,0.448]"
5
+ Human,Start_codon,"[0.719,0.304,0.482,0.019,0.302,0.942,0.924,0.982,0.982,0.161]"
6
+ Cattle,Intron,"[0.853,0.969,0.585,0.609,0.127,0.842,0.814,0.147,0.472,0.258]"
7
+ Cattle,Exon,"[0.163,0.118,0.152,0.525,0.179,0.967,0.574,0.897,0.593,0.454]"
8
+ Cattle,Splice_acceptor,"[0.977,0.332,0.505,0.069,0.928,0.780,0.618,0.525,0.787,0.741]"
9
+ Cattle,Start_codon,"[0.499,0.858,0.155,0.246,0.494,0.853,0.439,0.853,0.882,0.295]"
10
+ Tomato,Intron,"[0.171,0.995,0.512,0.446,0.816,0.344,0.637,0.492,0.992,0.526]"
11
+ Tomato,Exon,"[0.751,0.186,0.778,0.341,0.853,0.711,0.161,0.559,0.204,0.153]"
12
+ Tomato,Splice_acceptor,"[0.073,0.688,0.568,0.669,0.910,0.581,0.168,0.687,0.928,0.821]"
13
+ Tomato,Start_codon,"[0.491,0.743,0.172,0.351,0.675,0.845,0.077,0.593,0.552,0.089]"
data/bigwig_dataset.csv ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ species,assay_type,datasets,pearson correlation
2
+ Human,ATAC-seq,ENCSR628PLS,"[0.500,0.673,0.992,0.631,0.890,0.280,0.948,0.018,0.162,0.788]"
3
+ Human,ATAC-seq,ENCSR487QSB,"[0.392,0.579,0.183,0.608,0.356,0.434,0.455,0.654,0.969,0.112]"
4
+ Human,ATAC-seq,ENCSR410DWV,"[0.319,0.624,0.419,0.602,0.592,0.114,0.380,0.635,0.805,0.988]"
5
+ Human,ATAC-seq,ENCSR325NFE,"[0.007,0.204,0.430,0.030,0.239,0.487,0.504,0.286,0.494,0.319]"
6
+ Human,ATAC-seq,ENCSR814RGG,"[0.286,0.247,0.734,0.201,0.020,0.989,0.409,0.159,0.911,1.000]"
7
+ Human,ChIP-seq,ENCSR863PSM,"[0.118,0.198,0.728,0.134,0.653,0.821,0.434,0.642,0.029,0.101]"
8
+ Human,ChIP-seq,ENCSR682BFG,"[0.035,0.880,0.423,0.537,0.104,0.156,0.358,0.540,0.167,0.245]"
9
+ Human,ChIP-seq,ENCSR754DRC,"[0.377,0.171,0.759,0.963,0.278,0.784,0.853,0.123,0.852,0.139]"
10
+ Human,ChIP-seq,ENCSR962OTG,"[0.015,0.616,0.259,0.921,0.468,0.926,0.312,0.214,0.183,0.586]"
11
+ Human,PRO-cap,ENCSR046BCI_M,"[0.268,0.200,0.376,0.365,0.546,0.915,0.755,0.346,0.840,0.842]"
12
+ Human,PRO-cap,ENCSR046BCI_P,"[0.866,0.553,0.464,0.809,0.784,0.137,0.547,0.015,0.155,0.885]"
13
+ Human,PRO-cap,ENCSR100LIJ_M,"[0.989,0.039,0.886,0.411,0.003,0.699,0.816,0.126,0.790,0.204]"
14
+ Human,PRO-cap,ENCSR100LIJ_P,"[0.024,0.270,0.623,0.091,0.040,0.722,0.629,0.364,0.852,0.539]"
15
+ Human,PRO-cap,ENCSR935RNW_M,"[0.300,0.520,0.939,0.683,0.229,0.368,0.501,0.901,0.368,0.351]"
16
+ Human,PRO-cap,ENCSR935RNW_P,"[0.164,0.431,0.981,0.954,0.796,0.993,0.081,0.099,0.133,0.479]"
17
+ Human,PRO-cap,ENCSR114HGS_M,"[0.833,0.079,0.685,0.750,0.107,0.223,0.229,0.887,0.058,0.477]"
18
+ Human,PRO-cap,ENCSR114HGS_P,"[0.986,0.601,0.523,0.976,0.167,0.107,0.475,0.729,0.513,0.656]"
19
+ Human,PRO-cap,ENCSR799DGV_M,"[0.696,0.732,0.735,0.070,0.158,0.806,0.628,0.595,0.743,0.666]"
20
+ Human,PRO-cap,ENCSR799DGV_P,"[0.242,0.036,0.942,0.224,0.797,0.958,0.120,0.132,0.162,0.313]"
21
+ Human,eCLIP,ENCSR154HRN_M,"[0.171,0.081,0.050,0.809,0.600,0.250,0.089,0.804,0.930,0.990]"
22
+ Human,eCLIP,ENCSR154HRN_P,"[0.572,0.883,0.764,0.465,0.257,0.531,0.389,0.078,0.414,0.882]"
23
+ Human,eCLIP,ENCSR249ROI_M,"[0.515,0.715,0.515,0.974,0.235,0.599,0.060,0.014,0.573,0.645]"
24
+ Human,eCLIP,ENCSR249ROI_P,"[0.513,0.539,0.588,0.266,0.888,0.126,0.770,0.048,0.911,0.076]"
25
+ Human,eCLIP,ENCSR321PWZ_M,"[0.593,0.376,0.753,0.990,0.908,0.128,0.994,0.231,0.326,0.488]"
26
+ Human,eCLIP,ENCSR321PWZ_P,"[0.396,0.834,0.536,0.662,0.523,0.260,0.863,0.144,0.682,0.428]"
27
+ Human,eCLIP,ENCSR484LTQ_M,"[0.500,0.297,0.116,0.644,0.593,0.359,0.532,0.290,0.512,0.860]"
28
+ Human,eCLIP,ENCSR484LTQ_P,"[0.066,0.726,0.662,0.471,0.062,0.253,0.785,0.242,0.179,0.792]"
29
+ Human,eCLIP,ENCSR862QCH_M,"[0.177,0.931,0.556,0.253,0.325,0.421,0.107,0.472,0.601,0.270]"
30
+ Human,eCLIP,ENCSR862QCH_P,"[0.567,0.657,0.240,0.844,0.392,0.685,0.376,0.586,0.009,0.874]"
31
+ Human,RNA-seq,ENCSR527JGN_M,"[0.290,0.982,0.241,0.573,0.275,0.819,0.182,0.872,0.358,0.225]"
32
+ Human,RNA-seq,ENCSR527JGN_P,"[0.390,0.131,0.325,0.252,0.376,0.154,0.383,0.796,0.908,0.018]"
33
+ Human,RNA-seq,ENCSR701YIC,"[0.819,0.494,0.866,0.884,0.795,0.625,0.430,0.194,0.942,0.054]"
34
+ Human,RNA-seq,ENCSR619DQO_M,"[0.416,0.079,0.886,0.028,0.310,0.883,0.393,0.946,0.261,0.709]"
35
+ Human,RNA-seq,ENCSR619DQO_P,"[0.712,0.221,0.052,0.699,0.099,0.213,0.167,0.355,0.209,0.401]"
36
+ Chicken,ATAC-seq,ERX9662174,"[0.970,0.682,0.553,0.337,0.432,0.824,0.736,0.245,0.151,0.220]"
37
+ Chicken,ATAC-seq,ERX9662177,"[0.621,0.160,0.453,0.308,0.119,0.981,0.506,0.906,0.307,0.602]"
38
+ Chicken,ATAC-seq,ERX9662178,"[0.544,0.381,0.943,0.517,0.130,0.982,0.330,0.374,0.081,0.249]"
39
+ Chicken,ATAC-seq,ERX9662183,"[0.705,0.672,0.926,0.499,0.985,0.486,0.840,0.058,0.138,0.402]"
40
+ Chicken,ATAC-seq,ERX9662185,"[0.106,0.599,0.743,0.927,0.004,0.540,0.234,0.200,0.490,0.455]"
41
+ Chicken,ATAC-seq,ERX9662186,"[0.367,0.607,0.281,0.163,0.408,0.589,0.855,0.815,0.799,0.590]"
42
+ Chicken,ATAC-seq,ERX9662180,"[0.576,0.006,0.981,0.060,0.578,0.195,0.165,0.634,0.789,0.233]"
43
+ Chicken,RNA-seq,ERX9662188,"[0.236,0.404,0.687,0.930,0.053,0.112,0.635,0.056,0.131,0.109]"
44
+ Chicken,RNA-seq,ERX9662190,"[0.191,0.503,0.917,0.304,0.014,0.583,0.413,0.580,0.219,0.522]"
45
+ Chicken,RNA-seq,ERX9662192,"[0.026,0.091,0.601,0.767,0.560,0.808,0.243,0.179,0.925,0.379]"
46
+ Chicken,RNA-seq,ERX9662194,"[0.116,0.590,0.266,0.674,0.765,0.495,0.089,0.043,0.513,0.244]"
47
+ Chicken,RNA-seq,ERX9662196,"[0.354,0.857,0.923,0.947,0.704,0.888,0.610,0.376,0.282,0.741]"
48
+ Chicken,RNA-seq,ERX9662198,"[0.122,0.997,0.500,0.750,0.170,0.344,0.260,0.195,0.711,0.406]"
49
+ Chicken,RNA-seq,ERX9662200,"[0.884,0.124,0.168,0.388,0.583,0.663,0.496,0.660,0.048,0.844]"
50
+ Rice,Ribo-seq,SRR13808067,"[0.281,0.802,0.011,0.794,0.410,0.959,0.597,0.247,0.036,0.055]"
51
+ Rice,Ribo-seq,SRR13808068,"[0.821,0.082,0.283,0.190,0.673,0.660,0.910,0.589,0.239,0.468]"
52
+ Rice,Ribo-seq,SRR13808069,"[0.874,0.258,0.766,0.217,0.350,0.426,0.776,0.144,0.929,0.210]"
53
+ Rice,Ribo-seq,SRR13808070,"[0.016,0.417,0.724,0.646,0.571,0.076,0.525,0.522,0.983,0.911]"
54
+ Rice,Ribo-seq,SRR13808072,"[0.146,0.401,0.324,0.814,0.479,0.942,0.909,0.960,0.495,0.467]"
55
+ Rice,Ribo-seq,SRR13808073,"[0.040,0.637,0.977,0.892,0.774,0.418,0.061,0.907,0.384,0.551]"
56
+ Rice,Ribo-seq,SRR13808074,"[0.565,0.197,0.363,0.493,0.160,0.332,0.581,0.834,0.850,0.852]"
57
+ Rice,Ribo-seq,SRR13808075,"[0.595,0.306,0.426,0.823,0.851,0.244,0.450,0.012,0.100,0.404]"
58
+ Rice,Ribo-seq,SRR13808076,"[0.704,0.805,0.244,0.887,0.700,0.524,0.576,0.781,0.633,0.821]"
59
+ Rice,Ribo-seq,SRR13808077,"[0.052,0.298,0.045,0.520,0.834,0.372,0.287,0.923,0.193,0.033]"
60
+ Rice,Ribo-seq,SRR13808078,"[0.889,0.934,0.022,0.732,0.651,0.042,0.601,0.366,0.271,0.982]"
61
+ Rice,Ribo-seq,SRR13808064,"[0.678,0.267,0.069,0.205,0.194,0.302,0.493,0.217,0.418,0.570]"
62
+ Rice,Ribo-seq,SRR13808065,"[0.489,0.130,0.499,0.610,0.064,0.004,0.700,0.189,0.424,0.547]"
63
+ Rice,Ribo-seq,SRR13808066,"[0.511,0.092,0.886,0.340,0.030,0.073,0.355,0.021,0.839,0.384]"
64
+ Maize,Ribo-seq,SRR13808051,"[0.620,0.872,0.972,0.392,0.498,0.671,0.303,0.066,0.506,0.327]"
65
+ Maize,Ribo-seq,SRR13808052,"[0.268,0.581,0.455,0.906,0.656,0.776,0.892,0.872,0.232,0.378]"
66
+ Maize,Ribo-seq,SRR13808056,"[0.786,0.499,0.807,0.657,0.853,0.539,0.276,0.593,0.362,0.359]"
67
+ Maize,Ribo-seq,SRR13808058,"[0.293,0.987,0.781,0.717,0.044,0.145,0.617,0.430,0.358,0.573]"
68
+ Maize,Ribo-seq,SRR13808059,"[0.078,0.279,0.384,0.989,0.802,0.375,0.396,0.461,0.066,0.484]"
69
+ Maize,Ribo-seq,SRR13808061,"[0.016,0.372,0.857,0.610,0.178,0.133,0.579,0.200,0.059,0.173]"
70
+ Maize,Ribo-seq,SRR13808062,"[0.953,0.791,0.948,0.761,0.733,0.992,0.239,0.362,0.976,0.112]"
71
+ Maize,Ribo-seq,SRR13808063,"[0.717,0.289,0.606,0.648,0.963,0.046,0.597,0.485,0.601,0.927]"
72
+ Tomato,ATAC-seq,SRX27799703,"[0.158,0.581,0.186,0.230,0.791,0.101,0.014,0.630,0.712,0.165]"
73
+ Tomato,ATAC-seq,SRX27799731,"[0.614,0.980,0.431,0.648,0.283,0.303,0.967,0.275,0.278,0.336]"
74
+ Tomato,ATAC-seq,SRX27799719,"[0.930,0.199,0.335,0.824,0.410,0.866,0.886,0.164,0.935,0.720]"
75
+ Tomato,ATAC-seq,SRX27799727,"[0.642,0.539,0.221,0.035,0.048,0.136,0.687,0.922,0.974,0.821]"
76
+ Tomato,ATAC-seq,SRX27799722,"[0.305,0.463,0.639,0.774,0.851,0.812,0.476,0.443,0.239,0.197]"
77
+ Tomato,ATAC-seq,SRX27799718,"[0.561,0.584,0.014,0.146,0.447,0.366,0.707,0.871,0.469,0.324]"
78
+ Tomato,ATAC-seq,SRX27799733,"[0.260,0.257,0.157,0.078,0.358,0.448,0.395,0.780,0.622,0.748]"
79
+ Tomato,ATAC-seq,SRX29291448,"[0.444,0.970,0.856,0.004,0.539,0.036,0.317,0.532,0.969,0.972]"
80
+ Tomato,ATAC-seq,SRX29291447,"[0.781,0.433,0.134,0.094,0.658,0.878,0.315,0.770,0.052,0.817]"
81
+ Tomato,ATAC-seq,SRX29291446,"[0.759,0.966,0.641,0.731,0.719,0.880,0.277,0.054,0.277,0.294]"
82
+ Tomato,ATAC-seq,SRX29291445,"[0.625,0.970,0.044,0.236,0.403,0.389,0.847,0.558,0.922,0.819]"
83
+ Tomato,ATAC-seq,SRX29291444,"[0.854,0.242,0.517,0.964,0.880,0.467,0.727,0.467,0.628,0.161]"
84
+ Tomato,ATAC-seq,SRX29291443,"[0.437,0.949,0.847,0.303,0.246,0.285,0.937,0.604,0.802,0.806]"
85
+ Tomato,ATAC-seq,SRX29291442,"[0.579,0.635,0.763,0.634,0.287,0.548,0.914,0.167,0.181,0.703]"
86
+ Tomato,ATAC-seq,SRX29291441,"[0.622,0.530,0.081,0.768,0.473,0.617,0.944,0.122,0.979,0.502]"
87
+ Tomato,ATAC-seq,SRX29291440,"[0.985,0.297,0.126,0.540,0.415,0.520,0.139,0.687,0.189,0.274]"
88
+ Tomato,ATAC-seq,SRX29291439,"[0.138,0.458,0.886,0.864,0.646,0.653,0.311,0.651,0.790,0.886]"
89
+ Tomato,ATAC-seq,SRX29291438,"[0.105,0.258,0.901,0.694,0.369,0.208,0.040,0.538,0.711,0.464]"
90
+ Tomato,ATAC-seq,SRX29291431,"[0.240,0.380,0.422,0.485,0.491,0.700,0.192,0.989,0.527,0.435]"
91
+ Tomato,ATAC-seq,SRX29291430,"[0.302,0.618,0.073,0.107,0.287,0.651,0.585,0.092,0.193,0.848]"
92
+ Arabidopsis,,SRX18063718,"[0.967,0.150,0.374,0.382,0.905,0.827,0.972,0.121,0.326,0.431]"
93
+ Arabidopsis,,SRX9629757,"[0.796,0.147,0.719,0.989,0.616,0.399,0.823,0.056,0.258,0.796]"
94
+ Arabidopsis,,SRX14636807,"[0.040,0.357,0.850,0.730,0.254,0.593,0.704,0.170,0.598,0.640]"
95
+ Arabidopsis,,SRX21812610,"[0.447,0.071,0.487,0.403,0.242,0.104,0.565,0.436,0.458,0.913]"
96
+ Arabidopsis,,SRX20209030,"[0.949,0.649,0.518,0.671,0.232,0.907,0.843,0.929,0.314,0.956]"
97
+ Arabidopsis,,SRX27406559,"[0.278,0.701,0.666,0.573,0.871,0.294,0.713,0.590,0.953,0.712]"
98
+ Arabidopsis,,SRX7819350,"[0.681,0.850,0.103,0.974,0.466,0.860,0.310,0.960,0.004,0.393]"
99
+ Arabidopsis,,SRX26731321,"[0.447,0.114,0.530,0.061,0.321,0.055,0.698,0.249,0.793,0.989]"
100
+ Arabidopsis,,SRX13076012,"[0.853,0.824,0.850,0.426,0.984,0.483,0.071,0.824,0.714,0.091]"
101
+ Arabidopsis,,SRX11037822,"[0.877,0.707,0.165,0.350,0.395,0.784,0.729,0.480,0.616,0.227]"
102
+ Arabidopsis,,SRX15435939,"[0.372,0.857,0.601,0.899,0.333,0.671,0.459,0.611,0.519,0.940]"
103
+ Arabidopsis,,ERX5787106,"[0.137,0.246,0.789,0.451,0.626,0.413,0.608,0.745,0.233,0.975]"
104
+ Arabidopsis,,SRX26096490,"[0.420,0.887,0.408,0.749,0.888,0.150,0.660,0.795,0.282,0.280]"
105
+ Arabidopsis,,SRX12144795,"[0.463,0.706,0.722,0.959,0.595,0.019,0.843,0.444,0.373,0.925]"
106
+ Arabidopsis,,SRX27406577,"[0.548,0.636,0.692,0.437,0.646,0.911,0.879,0.234,0.663,0.982]"
107
+ Arabidopsis,,SRX23584554,"[0.745,0.674,0.711,0.209,0.502,0.331,0.289,0.501,0.184,0.038]"
108
+ Arabidopsis,,SRX9514922,"[0.224,0.486,0.254,0.766,0.440,0.329,0.257,0.503,0.323,0.911]"
109
+ Arabidopsis,,ERX11438964,"[0.263,0.561,0.542,0.038,0.487,0.028,0.341,0.358,0.447,0.510]"
110
+ Arabidopsis,,SRX19866208,"[0.133,0.023,0.062,0.035,0.245,0.281,0.117,0.454,0.285,0.345]"
111
+ Arabidopsis,,DRX593284,"[0.741,0.308,0.507,0.118,0.818,0.823,0.517,0.224,0.480,0.971]"
112
+ Arabidopsis,,SRX10135081,"[0.234,0.015,0.219,0.243,0.567,0.446,0.702,0.127,0.244,0.568]"
113
+ Arabidopsis,,SRX3744342,"[0.445,0.018,0.594,0.522,0.599,0.978,0.891,0.427,0.202,0.199]"
114
+ Arabidopsis,,SRX9674213,"[0.294,0.493,0.407,0.817,0.341,0.948,0.814,0.010,0.985,0.796]"
115
+ Arabidopsis,,SRX19866210,"[0.875,0.338,0.644,0.596,0.855,0.824,0.271,0.106,0.695,0.334]"
116
+ Arabidopsis,,SRX6685858,"[0.629,0.081,0.456,0.235,0.527,0.210,0.546,0.437,0.842,0.898]"
117
+ Arabidopsis,,SRX3529727,"[0.101,0.966,0.940,0.374,0.043,0.537,0.771,0.753,0.082,0.295]"
118
+ Arabidopsis,,SRX18888026,"[0.451,0.104,0.831,0.333,0.525,0.173,0.262,0.199,0.977,0.222]"
119
+ Arabidopsis,,SRX689004,"[0.732,0.967,0.289,0.289,0.979,0.288,0.969,0.320,0.785,0.183]"
120
+ Arabidopsis,,SRX18888040,"[0.536,0.036,0.523,0.152,0.950,0.991,0.616,0.724,0.650,0.235]"
121
+ Arabidopsis,,SRX26650235,"[0.450,0.694,0.877,0.342,0.091,0.545,0.969,0.335,0.951,0.614]"
requirements.txt CHANGED
@@ -1,3 +1,17 @@
1
- altair
 
 
 
 
 
 
 
 
 
2
  pandas
3
- streamlit
 
 
 
 
 
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_leaderboard==0.0.9
7
+ gradio_client
8
+ huggingface-hub>=0.18.0
9
+ matplotlib
10
+ numpy
11
  pandas
12
+ python-dateutil
13
+ tqdm
14
+ transformers
15
+ tokenizers>=0.15.0
16
+ sentencepiece
17
+ plotly
src/streamlit_app.py CHANGED
@@ -1,40 +1,336 @@
1
- import altair as alt
2
- import numpy as np
 
 
3
  import pandas as pd
4
  import streamlit as st
 
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
 
 
 
 
14
  """
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import ast
3
+ import os
4
+
5
  import pandas as pd
6
  import streamlit as st
7
+ import plotly.express as px
8
 
9
+ # ---------------------------------------------------------------------
10
+ # Configuration
11
+ # ---------------------------------------------------------------------
12
+
13
+ MODEL_NAMES = [
14
+ "NTv2 500M MS",
15
+ "BPNet 6M",
16
+ "SpliceAI 44M",
17
+ "PlantCAD2 - Small 88M",
18
+ "Evo2 1b BF16",
19
+ "NTv3 8M",
20
+ "NTv3 100M",
21
+ "NTv3 650M",
22
+ "NTv3 650M - post-trained",
23
+ ]
24
+
25
+ MODEL_COLORS = {
26
+ "NTv2 500M MS": "#1f77b4",
27
+ "BPNet 6M": "#ff7f0e",
28
+ "SpliceAI 44M": "#2ca02c",
29
+ "PlantCAD2 - Small 88M": "#d62728",
30
+ "Evo2 1b BF16": "#9467bd",
31
+ "NTv3 8M": "#8c564b",
32
+ "NTv3 100M": "#e377c2",
33
+ "NTv3 650M": "#7f7f7f",
34
+ "NTv3 650M - post-trained": "#bcbd22",
35
+ }
36
 
37
+ _LAST_UPDATED = "Dec 10, 2025"
38
+ _INTRO = """
39
+ Simple leaderboard over custom benchmarks.
40
 
41
+ - **Pearson correlations (multi-assay)**: per-dataset scores across species and models.
42
+ - **MCC (bed tracks)**: per-track MCC values across species and models.
43
+
44
+ Each metric cell in the CSVs is a list of scores (one per model).
45
+ We expand this to (Model × Species × Dataset) and aggregate according to your filters.
46
  """
47
 
48
+ DATA_DIR = "./data"
49
+ PEARSON_PATH = os.path.join(DATA_DIR, "bigwig_dataset.csv")
50
+ MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
51
+
52
+
53
+ # ---------------------------------------------------------------------
54
+ # Data loading & preprocessing
55
+ # ---------------------------------------------------------------------
56
+
57
+
58
+ @st.cache_data
59
+ def load_raw_data():
60
+ pearson_df = pd.read_csv(PEARSON_PATH)
61
+ mcc_df = pd.read_csv(MCC_PATH)
62
+
63
+ pearson_df.columns = [c.strip() for c in pearson_df.columns]
64
+ mcc_df.columns = [c.strip() for c in mcc_df.columns]
65
+ return pearson_df, mcc_df
66
+
67
+
68
+ def expand_metric_lists(df: pd.DataFrame, metric_col: str) -> pd.DataFrame:
69
+ """
70
+ Take a DataFrame where `metric_col` is a stringified list, and expand it
71
+ into rows per Model, with scalar 'Score' and 'Model' columns.
72
+ """
73
+ rows = []
74
+ for _, row in df.iterrows():
75
+ raw = row[metric_col]
76
+ try:
77
+ values = ast.literal_eval(str(raw))
78
+ except Exception:
79
+ # Skip rows that don't parse correctly
80
+ continue
81
+
82
+ if not isinstance(values, (list, tuple)):
83
+ continue
84
+
85
+ n_models = min(len(MODEL_NAMES), len(values))
86
+ for i in range(n_models):
87
+ new_row = {
88
+ "species": row["species"],
89
+ "datasets": row["datasets"],
90
+ "Model": MODEL_NAMES[i],
91
+ "Score": float(values[i]),
92
+ }
93
+ if "assay_type" in row.index:
94
+ new_row["assay_type"] = row["assay_type"]
95
+ rows.append(new_row)
96
+
97
+ return pd.DataFrame(rows)
98
+
99
+
100
+ @st.cache_data
101
+ def load_expanded_data():
102
+ raw_pearson, raw_mcc = load_raw_data()
103
+ pearson_expanded = expand_metric_lists(raw_pearson, "pearson correlation")
104
+ mcc_expanded = expand_metric_lists(raw_mcc, "MCC")
105
+ return pearson_expanded, mcc_expanded
106
+
107
+
108
+ _PEARSON_DF, _MCC_DF = load_expanded_data()
109
+
110
+ # Global sets (we'll further filter per-benchmark below)
111
+ _ALL_SPECIES = sorted(
112
+ set(_PEARSON_DF["species"].unique()).union(_MCC_DF["species"].unique())
113
+ )
114
+
115
+ _ALL_ASSAYS = (
116
+ sorted(_PEARSON_DF["assay_type"].dropna().unique())
117
+ if "assay_type" in _PEARSON_DF.columns
118
+ else []
119
+ )
120
+
121
+ _ALL_MODELS = MODEL_NAMES[:]
122
+
123
+ _BENCHMARKS = {
124
+ "Pearson correlations (multi-assay)": {
125
+ "df": _PEARSON_DF,
126
+ "metric_label": "Pearson correlation",
127
+ "has_assay_type": True,
128
+ },
129
+ "MCC (bed tracks)": {
130
+ "df": _MCC_DF,
131
+ "metric_label": "MCC",
132
+ "has_assay_type": False,
133
+ },
134
+ }
135
+
136
+
137
+ # ---------------------------------------------------------------------
138
+ # Computation helpers
139
+ # ---------------------------------------------------------------------
140
+
141
+
142
+ def filter_base_df(
143
+ benchmark_name: str,
144
+ selected_species: List[str],
145
+ selected_assays: List[str],
146
+ selected_models: List[str],
147
+ selected_datasets: List[str],
148
+ ) -> pd.DataFrame:
149
+ cfg = _BENCHMARKS[benchmark_name]
150
+ df = cfg["df"].copy()
151
+
152
+ # Species filter
153
+ if selected_species:
154
+ df = df[df["species"].isin(selected_species)]
155
+
156
+ # Assay type filter (Pearson only)
157
+ if cfg.get("has_assay_type", False) and selected_assays and "assay_type" in df.columns:
158
+ df = df[df["assay_type"].isin(selected_assays)]
159
+
160
+ # Dataset / bed track filter (for MCC, but safe to apply generally)
161
+ if selected_datasets and "datasets" in df.columns:
162
+ df = df[df["datasets"].isin(selected_datasets)]
163
+
164
+ # Model filter
165
+ if selected_models:
166
+ df = df[df["Model"].isin(selected_models)]
167
+
168
+ return df
169
+
170
+
171
+ def build_leaderboard(
172
+ benchmark_name: str,
173
+ selected_species: List[str],
174
+ selected_assays: List[str],
175
+ selected_models: List[str],
176
+ selected_datasets: List[str],
177
+ ) -> pd.DataFrame:
178
+ df = filter_base_df(
179
+ benchmark_name,
180
+ selected_species,
181
+ selected_assays,
182
+ selected_models,
183
+ selected_datasets,
184
+ )
185
+
186
+ if df.empty:
187
+ return pd.DataFrame(columns=["Model", "Num entries", "Mean score"])
188
+
189
+ agg = (
190
+ df.groupby("Model")["Score"]
191
+ .mean()
192
+ .reset_index()
193
+ .rename(columns={"Score": "Mean score"})
194
+ )
195
+ agg["Mean score"] = agg["Mean score"].round(3)
196
+
197
+ agg["Num entries"] = (
198
+ df.groupby("Model")["Score"].count().reindex(agg["Model"]).values
199
+ )
200
+
201
+ agg = agg.sort_values("Mean score", ascending=False).reset_index(drop=True)
202
+ agg = agg[["Model", "Num entries", "Mean score"]]
203
+ return agg
204
+
205
+
206
+ def build_bar_df(
207
+ benchmark_name: str,
208
+ selected_species: List[str],
209
+ selected_assays: List[str],
210
+ selected_models: List[str],
211
+ selected_datasets: List[str],
212
+ ) -> pd.DataFrame:
213
+ """For now, just one bar per model (same as leaderboard)."""
214
+ return build_leaderboard(
215
+ benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
216
+ )
217
+
218
+
219
+ # ---------------------------------------------------------------------
220
+ # Streamlit UI
221
+ # ---------------------------------------------------------------------
222
+
223
+
224
+ def main():
225
+ st.set_page_config(
226
+ page_title="Custom Model Benchmarks",
227
+ layout="wide",
228
+ )
229
+
230
+ st.title("🧬 Custom Model Benchmarks")
231
+ st.markdown(_INTRO)
232
+ st.markdown(f"_Last updated: **{_LAST_UPDATED}**_")
233
+
234
+ # --- Sidebar filters ---
235
+ st.sidebar.header("Filters")
236
+
237
+ # Benchmark
238
+ benchmark_name = st.sidebar.selectbox(
239
+ "Benchmark",
240
+ options=list(_BENCHMARKS.keys()),
241
+ index=0,
242
+ )
243
+
244
+ cfg = _BENCHMARKS[benchmark_name]
245
+ df_bench = cfg["df"]
246
+
247
+ # Species toggles, but only for species present in this benchmark
248
+ st.sidebar.subheader("Species")
249
+ available_species = sorted(df_bench["species"].unique())
250
+ selected_species: List[str] = []
251
+ for sp in available_species:
252
+ if st.sidebar.toggle(sp, value=True, key=f"species_{benchmark_name}_{sp}"):
253
+ selected_species.append(sp)
254
+
255
+ # Assay toggles (Pearson only), based on filtered species
256
+ if cfg.get("has_assay_type", False):
257
+ st.sidebar.subheader("Assay types (Pearson only)")
258
+ if selected_species:
259
+ df_for_assays = df_bench[df_bench["species"].isin(selected_species)]
260
+ else:
261
+ df_for_assays = df_bench
262
+ available_assays = (
263
+ sorted(df_for_assays["assay_type"].dropna().unique())
264
+ if "assay_type" in df_for_assays.columns
265
+ else []
266
+ )
267
+ selected_assays: List[str] = []
268
+ for assay in available_assays:
269
+ if st.sidebar.toggle(assay, value=True, key=f"assay_{benchmark_name}_{assay}"):
270
+ selected_assays.append(assay)
271
+ else:
272
+ selected_assays = []
273
+
274
+ # Bed track / dataset toggles (MCC only), based on species selection
275
+ selected_datasets: List[str] = []
276
+ if benchmark_name == "MCC (bed tracks)":
277
+ st.sidebar.subheader("Bed tracks (datasets)")
278
+ if selected_species:
279
+ df_for_tracks = df_bench[df_bench["species"].isin(selected_species)]
280
+ else:
281
+ df_for_tracks = df_bench
282
+ available_datasets = sorted(df_for_tracks["datasets"].unique())
283
+ for ds in available_datasets:
284
+ if st.sidebar.toggle(ds, value=True, key=f"dataset_{benchmark_name}_{ds}"):
285
+ selected_datasets.append(ds)
286
+ else:
287
+ selected_datasets = []
288
+
289
+ # Model toggles (we keep all models, regardless of benchmark; filters will prune)
290
+ st.sidebar.subheader("Models")
291
+ selected_models: List[str] = []
292
+ for model in _ALL_MODELS:
293
+ if st.sidebar.toggle(model, value=True, key=f"model_{model}"):
294
+ selected_models.append(model)
295
+
296
+ # --- Main content ---
297
+ leaderboard_df = build_leaderboard(
298
+ benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
299
+ )
300
+ bar_df = build_bar_df(
301
+ benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
302
+ )
303
+
304
+ col1, col2 = st.columns([1, 1])
305
+
306
+ with col1:
307
+ st.subheader("🏅 Leaderboard (per model)")
308
+ if leaderboard_df.empty:
309
+ st.info("No data for the selected filters.")
310
+ else:
311
+ st.dataframe(leaderboard_df, use_container_width=True)
312
+
313
+ with col2:
314
+ st.subheader("📈 Mean score per model")
315
+ if bar_df.empty:
316
+ st.info("No data for the selected filters.")
317
+ else:
318
+ fig = px.bar(
319
+ bar_df,
320
+ x="Model",
321
+ y="Mean score",
322
+ color="Model",
323
+ color_discrete_map=MODEL_COLORS,
324
+ )
325
+ fig.update_layout(
326
+ barmode="group",
327
+ height=500,
328
+ xaxis_title="Model",
329
+ yaxis_title="Mean score",
330
+ plot_bgcolor="rgba(0,0,0,0)",
331
+ paper_bgcolor="rgba(0,0,0,0)",
332
+ )
333
+ st.plotly_chart(fig, use_container_width=True)
334
+
335
+
336
+ main()