Spaces:
Running
Running
feat: example streamlit app
Browse files- data/bed_dataset.csv +13 -0
- data/bigwig_dataset.csv +121 -0
- requirements.txt +16 -2
- src/streamlit_app.py +329 -33
data/bed_dataset.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
species,datasets,MCC
|
| 2 |
+
Human,Intron,"[0.893,0.170,0.006,0.828,0.235,0.114,0.707,0.904,0.829,0.474]"
|
| 3 |
+
Human,Exon,"[0.294,0.007,0.428,0.341,0.757,0.442,0.512,0.860,0.884,0.873]"
|
| 4 |
+
Human,Splice_acceptor,"[0.057,0.279,0.129,0.844,0.272,0.174,0.082,0.603,0.277,0.448]"
|
| 5 |
+
Human,Start_codon,"[0.719,0.304,0.482,0.019,0.302,0.942,0.924,0.982,0.982,0.161]"
|
| 6 |
+
Cattle,Intron,"[0.853,0.969,0.585,0.609,0.127,0.842,0.814,0.147,0.472,0.258]"
|
| 7 |
+
Cattle,Exon,"[0.163,0.118,0.152,0.525,0.179,0.967,0.574,0.897,0.593,0.454]"
|
| 8 |
+
Cattle,Splice_acceptor,"[0.977,0.332,0.505,0.069,0.928,0.780,0.618,0.525,0.787,0.741]"
|
| 9 |
+
Cattle,Start_codon,"[0.499,0.858,0.155,0.246,0.494,0.853,0.439,0.853,0.882,0.295]"
|
| 10 |
+
Tomato,Intron,"[0.171,0.995,0.512,0.446,0.816,0.344,0.637,0.492,0.992,0.526]"
|
| 11 |
+
Tomato,Exon,"[0.751,0.186,0.778,0.341,0.853,0.711,0.161,0.559,0.204,0.153]"
|
| 12 |
+
Tomato,Splice_acceptor,"[0.073,0.688,0.568,0.669,0.910,0.581,0.168,0.687,0.928,0.821]"
|
| 13 |
+
Tomato,Start_codon,"[0.491,0.743,0.172,0.351,0.675,0.845,0.077,0.593,0.552,0.089]"
|
data/bigwig_dataset.csv
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
species,assay_type,datasets,pearson correlation
|
| 2 |
+
Human,ATAC-seq,ENCSR628PLS,"[0.500,0.673,0.992,0.631,0.890,0.280,0.948,0.018,0.162,0.788]"
|
| 3 |
+
Human,ATAC-seq,ENCSR487QSB,"[0.392,0.579,0.183,0.608,0.356,0.434,0.455,0.654,0.969,0.112]"
|
| 4 |
+
Human,ATAC-seq,ENCSR410DWV,"[0.319,0.624,0.419,0.602,0.592,0.114,0.380,0.635,0.805,0.988]"
|
| 5 |
+
Human,ATAC-seq,ENCSR325NFE,"[0.007,0.204,0.430,0.030,0.239,0.487,0.504,0.286,0.494,0.319]"
|
| 6 |
+
Human,ATAC-seq,ENCSR814RGG,"[0.286,0.247,0.734,0.201,0.020,0.989,0.409,0.159,0.911,1.000]"
|
| 7 |
+
Human,ChIP-seq,ENCSR863PSM,"[0.118,0.198,0.728,0.134,0.653,0.821,0.434,0.642,0.029,0.101]"
|
| 8 |
+
Human,ChIP-seq,ENCSR682BFG,"[0.035,0.880,0.423,0.537,0.104,0.156,0.358,0.540,0.167,0.245]"
|
| 9 |
+
Human,ChIP-seq,ENCSR754DRC,"[0.377,0.171,0.759,0.963,0.278,0.784,0.853,0.123,0.852,0.139]"
|
| 10 |
+
Human,ChIP-seq,ENCSR962OTG,"[0.015,0.616,0.259,0.921,0.468,0.926,0.312,0.214,0.183,0.586]"
|
| 11 |
+
Human,PRO-cap,ENCSR046BCI_M,"[0.268,0.200,0.376,0.365,0.546,0.915,0.755,0.346,0.840,0.842]"
|
| 12 |
+
Human,PRO-cap,ENCSR046BCI_P,"[0.866,0.553,0.464,0.809,0.784,0.137,0.547,0.015,0.155,0.885]"
|
| 13 |
+
Human,PRO-cap,ENCSR100LIJ_M,"[0.989,0.039,0.886,0.411,0.003,0.699,0.816,0.126,0.790,0.204]"
|
| 14 |
+
Human,PRO-cap,ENCSR100LIJ_P,"[0.024,0.270,0.623,0.091,0.040,0.722,0.629,0.364,0.852,0.539]"
|
| 15 |
+
Human,PRO-cap,ENCSR935RNW_M,"[0.300,0.520,0.939,0.683,0.229,0.368,0.501,0.901,0.368,0.351]"
|
| 16 |
+
Human,PRO-cap,ENCSR935RNW_P,"[0.164,0.431,0.981,0.954,0.796,0.993,0.081,0.099,0.133,0.479]"
|
| 17 |
+
Human,PRO-cap,ENCSR114HGS_M,"[0.833,0.079,0.685,0.750,0.107,0.223,0.229,0.887,0.058,0.477]"
|
| 18 |
+
Human,PRO-cap,ENCSR114HGS_P,"[0.986,0.601,0.523,0.976,0.167,0.107,0.475,0.729,0.513,0.656]"
|
| 19 |
+
Human,PRO-cap,ENCSR799DGV_M,"[0.696,0.732,0.735,0.070,0.158,0.806,0.628,0.595,0.743,0.666]"
|
| 20 |
+
Human,PRO-cap,ENCSR799DGV_P,"[0.242,0.036,0.942,0.224,0.797,0.958,0.120,0.132,0.162,0.313]"
|
| 21 |
+
Human,eCLIP,ENCSR154HRN_M,"[0.171,0.081,0.050,0.809,0.600,0.250,0.089,0.804,0.930,0.990]"
|
| 22 |
+
Human,eCLIP,ENCSR154HRN_P,"[0.572,0.883,0.764,0.465,0.257,0.531,0.389,0.078,0.414,0.882]"
|
| 23 |
+
Human,eCLIP,ENCSR249ROI_M,"[0.515,0.715,0.515,0.974,0.235,0.599,0.060,0.014,0.573,0.645]"
|
| 24 |
+
Human,eCLIP,ENCSR249ROI_P,"[0.513,0.539,0.588,0.266,0.888,0.126,0.770,0.048,0.911,0.076]"
|
| 25 |
+
Human,eCLIP,ENCSR321PWZ_M,"[0.593,0.376,0.753,0.990,0.908,0.128,0.994,0.231,0.326,0.488]"
|
| 26 |
+
Human,eCLIP,ENCSR321PWZ_P,"[0.396,0.834,0.536,0.662,0.523,0.260,0.863,0.144,0.682,0.428]"
|
| 27 |
+
Human,eCLIP,ENCSR484LTQ_M,"[0.500,0.297,0.116,0.644,0.593,0.359,0.532,0.290,0.512,0.860]"
|
| 28 |
+
Human,eCLIP,ENCSR484LTQ_P,"[0.066,0.726,0.662,0.471,0.062,0.253,0.785,0.242,0.179,0.792]"
|
| 29 |
+
Human,eCLIP,ENCSR862QCH_M,"[0.177,0.931,0.556,0.253,0.325,0.421,0.107,0.472,0.601,0.270]"
|
| 30 |
+
Human,eCLIP,ENCSR862QCH_P,"[0.567,0.657,0.240,0.844,0.392,0.685,0.376,0.586,0.009,0.874]"
|
| 31 |
+
Human,RNA-seq,ENCSR527JGN_M,"[0.290,0.982,0.241,0.573,0.275,0.819,0.182,0.872,0.358,0.225]"
|
| 32 |
+
Human,RNA-seq,ENCSR527JGN_P,"[0.390,0.131,0.325,0.252,0.376,0.154,0.383,0.796,0.908,0.018]"
|
| 33 |
+
Human,RNA-seq,ENCSR701YIC,"[0.819,0.494,0.866,0.884,0.795,0.625,0.430,0.194,0.942,0.054]"
|
| 34 |
+
Human,RNA-seq,ENCSR619DQO_M,"[0.416,0.079,0.886,0.028,0.310,0.883,0.393,0.946,0.261,0.709]"
|
| 35 |
+
Human,RNA-seq,ENCSR619DQO_P,"[0.712,0.221,0.052,0.699,0.099,0.213,0.167,0.355,0.209,0.401]"
|
| 36 |
+
Chicken,ATAC-seq,ERX9662174,"[0.970,0.682,0.553,0.337,0.432,0.824,0.736,0.245,0.151,0.220]"
|
| 37 |
+
Chicken,ATAC-seq,ERX9662177,"[0.621,0.160,0.453,0.308,0.119,0.981,0.506,0.906,0.307,0.602]"
|
| 38 |
+
Chicken,ATAC-seq,ERX9662178,"[0.544,0.381,0.943,0.517,0.130,0.982,0.330,0.374,0.081,0.249]"
|
| 39 |
+
Chicken,ATAC-seq,ERX9662183,"[0.705,0.672,0.926,0.499,0.985,0.486,0.840,0.058,0.138,0.402]"
|
| 40 |
+
Chicken,ATAC-seq,ERX9662185,"[0.106,0.599,0.743,0.927,0.004,0.540,0.234,0.200,0.490,0.455]"
|
| 41 |
+
Chicken,ATAC-seq,ERX9662186,"[0.367,0.607,0.281,0.163,0.408,0.589,0.855,0.815,0.799,0.590]"
|
| 42 |
+
Chicken,ATAC-seq,ERX9662180,"[0.576,0.006,0.981,0.060,0.578,0.195,0.165,0.634,0.789,0.233]"
|
| 43 |
+
Chicken,RNA-seq,ERX9662188,"[0.236,0.404,0.687,0.930,0.053,0.112,0.635,0.056,0.131,0.109]"
|
| 44 |
+
Chicken,RNA-seq,ERX9662190,"[0.191,0.503,0.917,0.304,0.014,0.583,0.413,0.580,0.219,0.522]"
|
| 45 |
+
Chicken,RNA-seq,ERX9662192,"[0.026,0.091,0.601,0.767,0.560,0.808,0.243,0.179,0.925,0.379]"
|
| 46 |
+
Chicken,RNA-seq,ERX9662194,"[0.116,0.590,0.266,0.674,0.765,0.495,0.089,0.043,0.513,0.244]"
|
| 47 |
+
Chicken,RNA-seq,ERX9662196,"[0.354,0.857,0.923,0.947,0.704,0.888,0.610,0.376,0.282,0.741]"
|
| 48 |
+
Chicken,RNA-seq,ERX9662198,"[0.122,0.997,0.500,0.750,0.170,0.344,0.260,0.195,0.711,0.406]"
|
| 49 |
+
Chicken,RNA-seq,ERX9662200,"[0.884,0.124,0.168,0.388,0.583,0.663,0.496,0.660,0.048,0.844]"
|
| 50 |
+
Rice,Ribo-seq,SRR13808067,"[0.281,0.802,0.011,0.794,0.410,0.959,0.597,0.247,0.036,0.055]"
|
| 51 |
+
Rice,Ribo-seq,SRR13808068,"[0.821,0.082,0.283,0.190,0.673,0.660,0.910,0.589,0.239,0.468]"
|
| 52 |
+
Rice,Ribo-seq,SRR13808069,"[0.874,0.258,0.766,0.217,0.350,0.426,0.776,0.144,0.929,0.210]"
|
| 53 |
+
Rice,Ribo-seq,SRR13808070,"[0.016,0.417,0.724,0.646,0.571,0.076,0.525,0.522,0.983,0.911]"
|
| 54 |
+
Rice,Ribo-seq,SRR13808072,"[0.146,0.401,0.324,0.814,0.479,0.942,0.909,0.960,0.495,0.467]"
|
| 55 |
+
Rice,Ribo-seq,SRR13808073,"[0.040,0.637,0.977,0.892,0.774,0.418,0.061,0.907,0.384,0.551]"
|
| 56 |
+
Rice,Ribo-seq,SRR13808074,"[0.565,0.197,0.363,0.493,0.160,0.332,0.581,0.834,0.850,0.852]"
|
| 57 |
+
Rice,Ribo-seq,SRR13808075,"[0.595,0.306,0.426,0.823,0.851,0.244,0.450,0.012,0.100,0.404]"
|
| 58 |
+
Rice,Ribo-seq,SRR13808076,"[0.704,0.805,0.244,0.887,0.700,0.524,0.576,0.781,0.633,0.821]"
|
| 59 |
+
Rice,Ribo-seq,SRR13808077,"[0.052,0.298,0.045,0.520,0.834,0.372,0.287,0.923,0.193,0.033]"
|
| 60 |
+
Rice,Ribo-seq,SRR13808078,"[0.889,0.934,0.022,0.732,0.651,0.042,0.601,0.366,0.271,0.982]"
|
| 61 |
+
Rice,Ribo-seq,SRR13808064,"[0.678,0.267,0.069,0.205,0.194,0.302,0.493,0.217,0.418,0.570]"
|
| 62 |
+
Rice,Ribo-seq,SRR13808065,"[0.489,0.130,0.499,0.610,0.064,0.004,0.700,0.189,0.424,0.547]"
|
| 63 |
+
Rice,Ribo-seq,SRR13808066,"[0.511,0.092,0.886,0.340,0.030,0.073,0.355,0.021,0.839,0.384]"
|
| 64 |
+
Maize,Ribo-seq,SRR13808051,"[0.620,0.872,0.972,0.392,0.498,0.671,0.303,0.066,0.506,0.327]"
|
| 65 |
+
Maize,Ribo-seq,SRR13808052,"[0.268,0.581,0.455,0.906,0.656,0.776,0.892,0.872,0.232,0.378]"
|
| 66 |
+
Maize,Ribo-seq,SRR13808056,"[0.786,0.499,0.807,0.657,0.853,0.539,0.276,0.593,0.362,0.359]"
|
| 67 |
+
Maize,Ribo-seq,SRR13808058,"[0.293,0.987,0.781,0.717,0.044,0.145,0.617,0.430,0.358,0.573]"
|
| 68 |
+
Maize,Ribo-seq,SRR13808059,"[0.078,0.279,0.384,0.989,0.802,0.375,0.396,0.461,0.066,0.484]"
|
| 69 |
+
Maize,Ribo-seq,SRR13808061,"[0.016,0.372,0.857,0.610,0.178,0.133,0.579,0.200,0.059,0.173]"
|
| 70 |
+
Maize,Ribo-seq,SRR13808062,"[0.953,0.791,0.948,0.761,0.733,0.992,0.239,0.362,0.976,0.112]"
|
| 71 |
+
Maize,Ribo-seq,SRR13808063,"[0.717,0.289,0.606,0.648,0.963,0.046,0.597,0.485,0.601,0.927]"
|
| 72 |
+
Tomato,ATAC-seq,SRX27799703,"[0.158,0.581,0.186,0.230,0.791,0.101,0.014,0.630,0.712,0.165]"
|
| 73 |
+
Tomato,ATAC-seq,SRX27799731,"[0.614,0.980,0.431,0.648,0.283,0.303,0.967,0.275,0.278,0.336]"
|
| 74 |
+
Tomato,ATAC-seq,SRX27799719,"[0.930,0.199,0.335,0.824,0.410,0.866,0.886,0.164,0.935,0.720]"
|
| 75 |
+
Tomato,ATAC-seq,SRX27799727,"[0.642,0.539,0.221,0.035,0.048,0.136,0.687,0.922,0.974,0.821]"
|
| 76 |
+
Tomato,ATAC-seq,SRX27799722,"[0.305,0.463,0.639,0.774,0.851,0.812,0.476,0.443,0.239,0.197]"
|
| 77 |
+
Tomato,ATAC-seq,SRX27799718,"[0.561,0.584,0.014,0.146,0.447,0.366,0.707,0.871,0.469,0.324]"
|
| 78 |
+
Tomato,ATAC-seq,SRX27799733,"[0.260,0.257,0.157,0.078,0.358,0.448,0.395,0.780,0.622,0.748]"
|
| 79 |
+
Tomato,ATAC-seq,SRX29291448,"[0.444,0.970,0.856,0.004,0.539,0.036,0.317,0.532,0.969,0.972]"
|
| 80 |
+
Tomato,ATAC-seq,SRX29291447,"[0.781,0.433,0.134,0.094,0.658,0.878,0.315,0.770,0.052,0.817]"
|
| 81 |
+
Tomato,ATAC-seq,SRX29291446,"[0.759,0.966,0.641,0.731,0.719,0.880,0.277,0.054,0.277,0.294]"
|
| 82 |
+
Tomato,ATAC-seq,SRX29291445,"[0.625,0.970,0.044,0.236,0.403,0.389,0.847,0.558,0.922,0.819]"
|
| 83 |
+
Tomato,ATAC-seq,SRX29291444,"[0.854,0.242,0.517,0.964,0.880,0.467,0.727,0.467,0.628,0.161]"
|
| 84 |
+
Tomato,ATAC-seq,SRX29291443,"[0.437,0.949,0.847,0.303,0.246,0.285,0.937,0.604,0.802,0.806]"
|
| 85 |
+
Tomato,ATAC-seq,SRX29291442,"[0.579,0.635,0.763,0.634,0.287,0.548,0.914,0.167,0.181,0.703]"
|
| 86 |
+
Tomato,ATAC-seq,SRX29291441,"[0.622,0.530,0.081,0.768,0.473,0.617,0.944,0.122,0.979,0.502]"
|
| 87 |
+
Tomato,ATAC-seq,SRX29291440,"[0.985,0.297,0.126,0.540,0.415,0.520,0.139,0.687,0.189,0.274]"
|
| 88 |
+
Tomato,ATAC-seq,SRX29291439,"[0.138,0.458,0.886,0.864,0.646,0.653,0.311,0.651,0.790,0.886]"
|
| 89 |
+
Tomato,ATAC-seq,SRX29291438,"[0.105,0.258,0.901,0.694,0.369,0.208,0.040,0.538,0.711,0.464]"
|
| 90 |
+
Tomato,ATAC-seq,SRX29291431,"[0.240,0.380,0.422,0.485,0.491,0.700,0.192,0.989,0.527,0.435]"
|
| 91 |
+
Tomato,ATAC-seq,SRX29291430,"[0.302,0.618,0.073,0.107,0.287,0.651,0.585,0.092,0.193,0.848]"
|
| 92 |
+
Arabidopsis,,SRX18063718,"[0.967,0.150,0.374,0.382,0.905,0.827,0.972,0.121,0.326,0.431]"
|
| 93 |
+
Arabidopsis,,SRX9629757,"[0.796,0.147,0.719,0.989,0.616,0.399,0.823,0.056,0.258,0.796]"
|
| 94 |
+
Arabidopsis,,SRX14636807,"[0.040,0.357,0.850,0.730,0.254,0.593,0.704,0.170,0.598,0.640]"
|
| 95 |
+
Arabidopsis,,SRX21812610,"[0.447,0.071,0.487,0.403,0.242,0.104,0.565,0.436,0.458,0.913]"
|
| 96 |
+
Arabidopsis,,SRX20209030,"[0.949,0.649,0.518,0.671,0.232,0.907,0.843,0.929,0.314,0.956]"
|
| 97 |
+
Arabidopsis,,SRX27406559,"[0.278,0.701,0.666,0.573,0.871,0.294,0.713,0.590,0.953,0.712]"
|
| 98 |
+
Arabidopsis,,SRX7819350,"[0.681,0.850,0.103,0.974,0.466,0.860,0.310,0.960,0.004,0.393]"
|
| 99 |
+
Arabidopsis,,SRX26731321,"[0.447,0.114,0.530,0.061,0.321,0.055,0.698,0.249,0.793,0.989]"
|
| 100 |
+
Arabidopsis,,SRX13076012,"[0.853,0.824,0.850,0.426,0.984,0.483,0.071,0.824,0.714,0.091]"
|
| 101 |
+
Arabidopsis,,SRX11037822,"[0.877,0.707,0.165,0.350,0.395,0.784,0.729,0.480,0.616,0.227]"
|
| 102 |
+
Arabidopsis,,SRX15435939,"[0.372,0.857,0.601,0.899,0.333,0.671,0.459,0.611,0.519,0.940]"
|
| 103 |
+
Arabidopsis,,ERX5787106,"[0.137,0.246,0.789,0.451,0.626,0.413,0.608,0.745,0.233,0.975]"
|
| 104 |
+
Arabidopsis,,SRX26096490,"[0.420,0.887,0.408,0.749,0.888,0.150,0.660,0.795,0.282,0.280]"
|
| 105 |
+
Arabidopsis,,SRX12144795,"[0.463,0.706,0.722,0.959,0.595,0.019,0.843,0.444,0.373,0.925]"
|
| 106 |
+
Arabidopsis,,SRX27406577,"[0.548,0.636,0.692,0.437,0.646,0.911,0.879,0.234,0.663,0.982]"
|
| 107 |
+
Arabidopsis,,SRX23584554,"[0.745,0.674,0.711,0.209,0.502,0.331,0.289,0.501,0.184,0.038]"
|
| 108 |
+
Arabidopsis,,SRX9514922,"[0.224,0.486,0.254,0.766,0.440,0.329,0.257,0.503,0.323,0.911]"
|
| 109 |
+
Arabidopsis,,ERX11438964,"[0.263,0.561,0.542,0.038,0.487,0.028,0.341,0.358,0.447,0.510]"
|
| 110 |
+
Arabidopsis,,SRX19866208,"[0.133,0.023,0.062,0.035,0.245,0.281,0.117,0.454,0.285,0.345]"
|
| 111 |
+
Arabidopsis,,DRX593284,"[0.741,0.308,0.507,0.118,0.818,0.823,0.517,0.224,0.480,0.971]"
|
| 112 |
+
Arabidopsis,,SRX10135081,"[0.234,0.015,0.219,0.243,0.567,0.446,0.702,0.127,0.244,0.568]"
|
| 113 |
+
Arabidopsis,,SRX3744342,"[0.445,0.018,0.594,0.522,0.599,0.978,0.891,0.427,0.202,0.199]"
|
| 114 |
+
Arabidopsis,,SRX9674213,"[0.294,0.493,0.407,0.817,0.341,0.948,0.814,0.010,0.985,0.796]"
|
| 115 |
+
Arabidopsis,,SRX19866210,"[0.875,0.338,0.644,0.596,0.855,0.824,0.271,0.106,0.695,0.334]"
|
| 116 |
+
Arabidopsis,,SRX6685858,"[0.629,0.081,0.456,0.235,0.527,0.210,0.546,0.437,0.842,0.898]"
|
| 117 |
+
Arabidopsis,,SRX3529727,"[0.101,0.966,0.940,0.374,0.043,0.537,0.771,0.753,0.082,0.295]"
|
| 118 |
+
Arabidopsis,,SRX18888026,"[0.451,0.104,0.831,0.333,0.525,0.173,0.262,0.199,0.977,0.222]"
|
| 119 |
+
Arabidopsis,,SRX689004,"[0.732,0.967,0.289,0.289,0.979,0.288,0.969,0.320,0.785,0.183]"
|
| 120 |
+
Arabidopsis,,SRX18888040,"[0.536,0.036,0.523,0.152,0.950,0.991,0.616,0.724,0.650,0.235]"
|
| 121 |
+
Arabidopsis,,SRX26650235,"[0.450,0.694,0.877,0.342,0.091,0.545,0.969,0.335,0.951,0.614]"
|
requirements.txt
CHANGED
|
@@ -1,3 +1,17 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
APScheduler
|
| 2 |
+
black
|
| 3 |
+
datasets
|
| 4 |
+
gradio
|
| 5 |
+
gradio[oauth]
|
| 6 |
+
gradio_leaderboard==0.0.9
|
| 7 |
+
gradio_client
|
| 8 |
+
huggingface-hub>=0.18.0
|
| 9 |
+
matplotlib
|
| 10 |
+
numpy
|
| 11 |
pandas
|
| 12 |
+
python-dateutil
|
| 13 |
+
tqdm
|
| 14 |
+
transformers
|
| 15 |
+
tokenizers>=0.15.0
|
| 16 |
+
sentencepiece
|
| 17 |
+
plotly
|
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,336 @@
|
|
| 1 |
-
|
| 2 |
-
import
|
|
|
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import streamlit as st
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"""
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
import ast
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
+
import plotly.express as px
|
| 8 |
|
| 9 |
+
# ---------------------------------------------------------------------
|
| 10 |
+
# Configuration
|
| 11 |
+
# ---------------------------------------------------------------------
|
| 12 |
+
|
| 13 |
+
MODEL_NAMES = [
|
| 14 |
+
"NTv2 500M MS",
|
| 15 |
+
"BPNet 6M",
|
| 16 |
+
"SpliceAI 44M",
|
| 17 |
+
"PlantCAD2 - Small 88M",
|
| 18 |
+
"Evo2 1b BF16",
|
| 19 |
+
"NTv3 8M",
|
| 20 |
+
"NTv3 100M",
|
| 21 |
+
"NTv3 650M",
|
| 22 |
+
"NTv3 650M - post-trained",
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
MODEL_COLORS = {
|
| 26 |
+
"NTv2 500M MS": "#1f77b4",
|
| 27 |
+
"BPNet 6M": "#ff7f0e",
|
| 28 |
+
"SpliceAI 44M": "#2ca02c",
|
| 29 |
+
"PlantCAD2 - Small 88M": "#d62728",
|
| 30 |
+
"Evo2 1b BF16": "#9467bd",
|
| 31 |
+
"NTv3 8M": "#8c564b",
|
| 32 |
+
"NTv3 100M": "#e377c2",
|
| 33 |
+
"NTv3 650M": "#7f7f7f",
|
| 34 |
+
"NTv3 650M - post-trained": "#bcbd22",
|
| 35 |
+
}
|
| 36 |
|
| 37 |
+
_LAST_UPDATED = "Dec 10, 2025"
|
| 38 |
+
_INTRO = """
|
| 39 |
+
Simple leaderboard over custom benchmarks.
|
| 40 |
|
| 41 |
+
- **Pearson correlations (multi-assay)**: per-dataset scores across species and models.
|
| 42 |
+
- **MCC (bed tracks)**: per-track MCC values across species and models.
|
| 43 |
+
|
| 44 |
+
Each metric cell in the CSVs is a list of scores (one per model).
|
| 45 |
+
We expand this to (Model × Species × Dataset) and aggregate according to your filters.
|
| 46 |
"""
|
| 47 |
|
| 48 |
+
DATA_DIR = "./data"
|
| 49 |
+
PEARSON_PATH = os.path.join(DATA_DIR, "bigwig_dataset.csv")
|
| 50 |
+
MCC_PATH = os.path.join(DATA_DIR, "bed_dataset.csv")
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ---------------------------------------------------------------------
|
| 54 |
+
# Data loading & preprocessing
|
| 55 |
+
# ---------------------------------------------------------------------
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@st.cache_data
|
| 59 |
+
def load_raw_data():
|
| 60 |
+
pearson_df = pd.read_csv(PEARSON_PATH)
|
| 61 |
+
mcc_df = pd.read_csv(MCC_PATH)
|
| 62 |
+
|
| 63 |
+
pearson_df.columns = [c.strip() for c in pearson_df.columns]
|
| 64 |
+
mcc_df.columns = [c.strip() for c in mcc_df.columns]
|
| 65 |
+
return pearson_df, mcc_df
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def expand_metric_lists(df: pd.DataFrame, metric_col: str) -> pd.DataFrame:
|
| 69 |
+
"""
|
| 70 |
+
Take a DataFrame where `metric_col` is a stringified list, and expand it
|
| 71 |
+
into rows per Model, with scalar 'Score' and 'Model' columns.
|
| 72 |
+
"""
|
| 73 |
+
rows = []
|
| 74 |
+
for _, row in df.iterrows():
|
| 75 |
+
raw = row[metric_col]
|
| 76 |
+
try:
|
| 77 |
+
values = ast.literal_eval(str(raw))
|
| 78 |
+
except Exception:
|
| 79 |
+
# Skip rows that don't parse correctly
|
| 80 |
+
continue
|
| 81 |
+
|
| 82 |
+
if not isinstance(values, (list, tuple)):
|
| 83 |
+
continue
|
| 84 |
+
|
| 85 |
+
n_models = min(len(MODEL_NAMES), len(values))
|
| 86 |
+
for i in range(n_models):
|
| 87 |
+
new_row = {
|
| 88 |
+
"species": row["species"],
|
| 89 |
+
"datasets": row["datasets"],
|
| 90 |
+
"Model": MODEL_NAMES[i],
|
| 91 |
+
"Score": float(values[i]),
|
| 92 |
+
}
|
| 93 |
+
if "assay_type" in row.index:
|
| 94 |
+
new_row["assay_type"] = row["assay_type"]
|
| 95 |
+
rows.append(new_row)
|
| 96 |
+
|
| 97 |
+
return pd.DataFrame(rows)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@st.cache_data
|
| 101 |
+
def load_expanded_data():
|
| 102 |
+
raw_pearson, raw_mcc = load_raw_data()
|
| 103 |
+
pearson_expanded = expand_metric_lists(raw_pearson, "pearson correlation")
|
| 104 |
+
mcc_expanded = expand_metric_lists(raw_mcc, "MCC")
|
| 105 |
+
return pearson_expanded, mcc_expanded
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
_PEARSON_DF, _MCC_DF = load_expanded_data()
|
| 109 |
+
|
| 110 |
+
# Global sets (we'll further filter per-benchmark below)
|
| 111 |
+
_ALL_SPECIES = sorted(
|
| 112 |
+
set(_PEARSON_DF["species"].unique()).union(_MCC_DF["species"].unique())
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
_ALL_ASSAYS = (
|
| 116 |
+
sorted(_PEARSON_DF["assay_type"].dropna().unique())
|
| 117 |
+
if "assay_type" in _PEARSON_DF.columns
|
| 118 |
+
else []
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
_ALL_MODELS = MODEL_NAMES[:]
|
| 122 |
+
|
| 123 |
+
_BENCHMARKS = {
|
| 124 |
+
"Pearson correlations (multi-assay)": {
|
| 125 |
+
"df": _PEARSON_DF,
|
| 126 |
+
"metric_label": "Pearson correlation",
|
| 127 |
+
"has_assay_type": True,
|
| 128 |
+
},
|
| 129 |
+
"MCC (bed tracks)": {
|
| 130 |
+
"df": _MCC_DF,
|
| 131 |
+
"metric_label": "MCC",
|
| 132 |
+
"has_assay_type": False,
|
| 133 |
+
},
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# ---------------------------------------------------------------------
|
| 138 |
+
# Computation helpers
|
| 139 |
+
# ---------------------------------------------------------------------
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def filter_base_df(
|
| 143 |
+
benchmark_name: str,
|
| 144 |
+
selected_species: List[str],
|
| 145 |
+
selected_assays: List[str],
|
| 146 |
+
selected_models: List[str],
|
| 147 |
+
selected_datasets: List[str],
|
| 148 |
+
) -> pd.DataFrame:
|
| 149 |
+
cfg = _BENCHMARKS[benchmark_name]
|
| 150 |
+
df = cfg["df"].copy()
|
| 151 |
+
|
| 152 |
+
# Species filter
|
| 153 |
+
if selected_species:
|
| 154 |
+
df = df[df["species"].isin(selected_species)]
|
| 155 |
+
|
| 156 |
+
# Assay type filter (Pearson only)
|
| 157 |
+
if cfg.get("has_assay_type", False) and selected_assays and "assay_type" in df.columns:
|
| 158 |
+
df = df[df["assay_type"].isin(selected_assays)]
|
| 159 |
+
|
| 160 |
+
# Dataset / bed track filter (for MCC, but safe to apply generally)
|
| 161 |
+
if selected_datasets and "datasets" in df.columns:
|
| 162 |
+
df = df[df["datasets"].isin(selected_datasets)]
|
| 163 |
+
|
| 164 |
+
# Model filter
|
| 165 |
+
if selected_models:
|
| 166 |
+
df = df[df["Model"].isin(selected_models)]
|
| 167 |
+
|
| 168 |
+
return df
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def build_leaderboard(
|
| 172 |
+
benchmark_name: str,
|
| 173 |
+
selected_species: List[str],
|
| 174 |
+
selected_assays: List[str],
|
| 175 |
+
selected_models: List[str],
|
| 176 |
+
selected_datasets: List[str],
|
| 177 |
+
) -> pd.DataFrame:
|
| 178 |
+
df = filter_base_df(
|
| 179 |
+
benchmark_name,
|
| 180 |
+
selected_species,
|
| 181 |
+
selected_assays,
|
| 182 |
+
selected_models,
|
| 183 |
+
selected_datasets,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if df.empty:
|
| 187 |
+
return pd.DataFrame(columns=["Model", "Num entries", "Mean score"])
|
| 188 |
+
|
| 189 |
+
agg = (
|
| 190 |
+
df.groupby("Model")["Score"]
|
| 191 |
+
.mean()
|
| 192 |
+
.reset_index()
|
| 193 |
+
.rename(columns={"Score": "Mean score"})
|
| 194 |
+
)
|
| 195 |
+
agg["Mean score"] = agg["Mean score"].round(3)
|
| 196 |
+
|
| 197 |
+
agg["Num entries"] = (
|
| 198 |
+
df.groupby("Model")["Score"].count().reindex(agg["Model"]).values
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
agg = agg.sort_values("Mean score", ascending=False).reset_index(drop=True)
|
| 202 |
+
agg = agg[["Model", "Num entries", "Mean score"]]
|
| 203 |
+
return agg
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def build_bar_df(
|
| 207 |
+
benchmark_name: str,
|
| 208 |
+
selected_species: List[str],
|
| 209 |
+
selected_assays: List[str],
|
| 210 |
+
selected_models: List[str],
|
| 211 |
+
selected_datasets: List[str],
|
| 212 |
+
) -> pd.DataFrame:
|
| 213 |
+
"""For now, just one bar per model (same as leaderboard)."""
|
| 214 |
+
return build_leaderboard(
|
| 215 |
+
benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
# ---------------------------------------------------------------------
|
| 220 |
+
# Streamlit UI
|
| 221 |
+
# ---------------------------------------------------------------------
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def main():
|
| 225 |
+
st.set_page_config(
|
| 226 |
+
page_title="Custom Model Benchmarks",
|
| 227 |
+
layout="wide",
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
st.title("🧬 Custom Model Benchmarks")
|
| 231 |
+
st.markdown(_INTRO)
|
| 232 |
+
st.markdown(f"_Last updated: **{_LAST_UPDATED}**_")
|
| 233 |
+
|
| 234 |
+
# --- Sidebar filters ---
|
| 235 |
+
st.sidebar.header("Filters")
|
| 236 |
+
|
| 237 |
+
# Benchmark
|
| 238 |
+
benchmark_name = st.sidebar.selectbox(
|
| 239 |
+
"Benchmark",
|
| 240 |
+
options=list(_BENCHMARKS.keys()),
|
| 241 |
+
index=0,
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
cfg = _BENCHMARKS[benchmark_name]
|
| 245 |
+
df_bench = cfg["df"]
|
| 246 |
+
|
| 247 |
+
# Species toggles, but only for species present in this benchmark
|
| 248 |
+
st.sidebar.subheader("Species")
|
| 249 |
+
available_species = sorted(df_bench["species"].unique())
|
| 250 |
+
selected_species: List[str] = []
|
| 251 |
+
for sp in available_species:
|
| 252 |
+
if st.sidebar.toggle(sp, value=True, key=f"species_{benchmark_name}_{sp}"):
|
| 253 |
+
selected_species.append(sp)
|
| 254 |
+
|
| 255 |
+
# Assay toggles (Pearson only), based on filtered species
|
| 256 |
+
if cfg.get("has_assay_type", False):
|
| 257 |
+
st.sidebar.subheader("Assay types (Pearson only)")
|
| 258 |
+
if selected_species:
|
| 259 |
+
df_for_assays = df_bench[df_bench["species"].isin(selected_species)]
|
| 260 |
+
else:
|
| 261 |
+
df_for_assays = df_bench
|
| 262 |
+
available_assays = (
|
| 263 |
+
sorted(df_for_assays["assay_type"].dropna().unique())
|
| 264 |
+
if "assay_type" in df_for_assays.columns
|
| 265 |
+
else []
|
| 266 |
+
)
|
| 267 |
+
selected_assays: List[str] = []
|
| 268 |
+
for assay in available_assays:
|
| 269 |
+
if st.sidebar.toggle(assay, value=True, key=f"assay_{benchmark_name}_{assay}"):
|
| 270 |
+
selected_assays.append(assay)
|
| 271 |
+
else:
|
| 272 |
+
selected_assays = []
|
| 273 |
+
|
| 274 |
+
# Bed track / dataset toggles (MCC only), based on species selection
|
| 275 |
+
selected_datasets: List[str] = []
|
| 276 |
+
if benchmark_name == "MCC (bed tracks)":
|
| 277 |
+
st.sidebar.subheader("Bed tracks (datasets)")
|
| 278 |
+
if selected_species:
|
| 279 |
+
df_for_tracks = df_bench[df_bench["species"].isin(selected_species)]
|
| 280 |
+
else:
|
| 281 |
+
df_for_tracks = df_bench
|
| 282 |
+
available_datasets = sorted(df_for_tracks["datasets"].unique())
|
| 283 |
+
for ds in available_datasets:
|
| 284 |
+
if st.sidebar.toggle(ds, value=True, key=f"dataset_{benchmark_name}_{ds}"):
|
| 285 |
+
selected_datasets.append(ds)
|
| 286 |
+
else:
|
| 287 |
+
selected_datasets = []
|
| 288 |
+
|
| 289 |
+
# Model toggles (we keep all models, regardless of benchmark; filters will prune)
|
| 290 |
+
st.sidebar.subheader("Models")
|
| 291 |
+
selected_models: List[str] = []
|
| 292 |
+
for model in _ALL_MODELS:
|
| 293 |
+
if st.sidebar.toggle(model, value=True, key=f"model_{model}"):
|
| 294 |
+
selected_models.append(model)
|
| 295 |
+
|
| 296 |
+
# --- Main content ---
|
| 297 |
+
leaderboard_df = build_leaderboard(
|
| 298 |
+
benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
|
| 299 |
+
)
|
| 300 |
+
bar_df = build_bar_df(
|
| 301 |
+
benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
col1, col2 = st.columns([1, 1])
|
| 305 |
+
|
| 306 |
+
with col1:
|
| 307 |
+
st.subheader("🏅 Leaderboard (per model)")
|
| 308 |
+
if leaderboard_df.empty:
|
| 309 |
+
st.info("No data for the selected filters.")
|
| 310 |
+
else:
|
| 311 |
+
st.dataframe(leaderboard_df, use_container_width=True)
|
| 312 |
+
|
| 313 |
+
with col2:
|
| 314 |
+
st.subheader("📈 Mean score per model")
|
| 315 |
+
if bar_df.empty:
|
| 316 |
+
st.info("No data for the selected filters.")
|
| 317 |
+
else:
|
| 318 |
+
fig = px.bar(
|
| 319 |
+
bar_df,
|
| 320 |
+
x="Model",
|
| 321 |
+
y="Mean score",
|
| 322 |
+
color="Model",
|
| 323 |
+
color_discrete_map=MODEL_COLORS,
|
| 324 |
+
)
|
| 325 |
+
fig.update_layout(
|
| 326 |
+
barmode="group",
|
| 327 |
+
height=500,
|
| 328 |
+
xaxis_title="Model",
|
| 329 |
+
yaxis_title="Mean score",
|
| 330 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 331 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 332 |
+
)
|
| 333 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
main()
|