|
|
| supported_datasets = { |
| 'EC': 'GleghornLab/EC_reg', |
| 'GO-CC': 'GleghornLab/CC_reg', |
| 'GO-BP': 'GleghornLab/BP_reg', |
| 'GO-MF': 'GleghornLab/MF_reg', |
| 'MB': 'GleghornLab/MB_reg', |
| 'DeepLoc-2': 'GleghornLab/DL2_reg', |
| 'DeepLoc-10': 'GleghornLab/DL10_reg', |
| 'Subcellular': 'GleghornLab/SL_13', |
| 'enzyme-kcat': 'GleghornLab/enzyme_kcat', |
| 'solubility': 'GleghornLab/solubility_prediction', |
| 'localization': 'GleghornLab/localization_prediction', |
| 'temperature-stability': 'GleghornLab/temperature_stability', |
| 'peptide-HLA-MHC-affinity': 'GleghornLab/peptide_HLA_MHC_affinity_ppi', |
| 'optimal-temperature': 'GleghornLab/optimal_temperature', |
| 'optimal-ph': 'GleghornLab/optimal_ph', |
| 'material-production': 'GleghornLab/material_production', |
| 'fitness-prediction': 'GleghornLab/fitness_prediction', |
| 'number-of-folds': 'GleghornLab/fold_prediction', |
| 'cloning-clf': 'GleghornLab/cloning_clf', |
| 'stability-prediction': 'GleghornLab/stability_prediction', |
| 'human-ppi-saprot': 'GleghornLab/HPPI', |
| 'SecondaryStructure-3': 'GleghornLab/SS3', |
| 'SecondaryStructure-8': 'GleghornLab/SS8', |
| 'fluorescence-prediction': 'GleghornLab/fluorescence_prediction', |
| 'plastic': 'GleghornLab/plastic_degradation_benchmark', |
| 'gold-ppi': 'Synthyra/bernett_gold_ppi', |
| 'human-ppi-pinui': 'GleghornLab/HPPI_PiNUI', |
| 'yeast-ppi-pinui': 'GleghornLab/YPPI_PiNUI', |
| 'shs27-ppi-raw': 'Synthyra/SHS27k', |
| 'shs148-ppi-raw': 'Synthyra/SHS148k', |
| 'shs27-ppi-random': 'GleghornLab/ppi_SHS27k_random_2025', |
| 'shs148-ppi-random': 'GleghornLab/ppi_SHS148k_random_2025', |
| 'shs27-ppi-dfs': 'GleghornLab/ppi_SHS27k_dfs_2025', |
| 'shs148-ppi-dfs': 'GleghornLab/ppi_SHS148k_dfs_2025', |
| 'shs27-ppi-bfs': 'GleghornLab/ppi_SHS27k_bfs_2025', |
| 'shs148-ppi-bfs': 'GleghornLab/ppi_SHS148k_bfs_2025', |
| 'string-ppi-random': 'GleghornLab/ppi_STRING_random_2025', |
| 'string-ppi-dfs': 'GleghornLab/ppi_STRING_dfs_2025', |
| 'string-ppi-bfs': 'GleghornLab/ppi_STRING_bfs_2025', |
| 'plm-interact': 'GleghornLab/plm_interact_human_train_cross_ppi', |
| 'ppi-mutation-effect': 'GleghornLab/ppi_mutation_effect', |
| 'PPA-ppi': 'Synthyra/ppi_affinity', |
| 'foldseek-fold': 'lhallee/foldseek_dataset', |
| 'foldseek-inverse': 'lhallee/foldseek_dataset', |
| 'ec-active': 'lhallee/ec_active', |
| 'bernett_processed': 'lhallee/bernett_processed', |
| 'proteingym_zs': 'proteingym_zs', |
| 'proteingym_supervised': 'proteingym_supervised', |
| 'taxon_domain': 'GleghornLab/taxonomy_domain_0.4_clusters', |
| 'taxon_kingdom': 'GleghornLab/taxonomy_kingdom_0.4_clusters', |
| 'taxon_phylum': 'GleghornLab/taxonomy_phylum_0.4_clusters', |
| 'taxon_class': 'GleghornLab/taxonomy_class_0.4_clusters', |
| 'taxon_order': 'GleghornLab/taxonomy_order_0.4_clusters', |
| 'taxon_family': 'GleghornLab/taxonomy_family_0.4_clusters', |
| 'taxon_genus': 'GleghornLab/taxonomy_genus_0.4_clusters', |
| 'taxon_species': 'GleghornLab/taxonomy_species_0.4_clusters', |
| 'diff_phylogeny': 'GleghornLab/diff_phylo', |
| 'plddt': 'GleghornLab/af2_plddt', |
| 'realness': 'GleghornLab/realness_dataset', |
| 'million_full': 'GleghornLab/millionfull_round_1_oct_2025', |
| 'soluprot': 'GleghornLab/soluprot', |
| 'ecoli_expression': 'GleghornLab/ecoli_expression', |
| 'KSMoFinder-clustered': 'GleghornLab/ksmo_clustered', |
| 'KSMoFinder': 'GleghornLab/KSmo_fixed', |
| } |
|
|
| internal_datasets = { |
| 'plastic': 'GleghornLab/plastic_degradation_benchmark', |
| } |
|
|
| |
| possible_with_vector_reps = [ |
| |
| 'EC', |
| |
| 'GO-CC', |
| 'GO-BP', |
| 'GO-MF', |
| 'Subcellular', |
| |
| 'shs27-ppi-random', |
| 'shs27-ppi-dfs', |
| 'shs27-ppi-bfs', |
| 'shs148-ppi-random', |
| 'shs148-ppi-dfs', |
| 'shs148-ppi-bfs', |
| 'string-ppi-random', |
| 'string-ppi-dfs', |
| 'string-ppi-bfs', |
| |
| 'MB', |
| 'DeepLoc-2', |
| 'DeepLoc-10', |
| 'solubility', |
| 'temperature-stability', |
| 'material-production', |
| 'fitness-prediction', |
| 'number-of-folds', |
| 'cloning-clf', |
| 'stability-prediction', |
| 'ec-active', |
| 'localization', |
| |
| 'taxon_domain', |
| 'taxon_kingdom', |
| 'taxon_phylum', |
| 'taxon_class', |
| 'taxon_order', |
| 'taxon_family', |
| 'taxon_genus', |
| 'taxon_species', |
| 'diff_phylogeny', |
| |
| 'shs27-ppi-raw', |
| 'shs148-ppi-raw', |
| 'plm-interact', |
| 'gold-ppi', |
| 'string-ppi-bfs', |
| 'human-ppi-saprot', |
| 'human-ppi-pinui', |
| 'yeast-ppi-pinui', |
| |
| 'enzyme-kcat', |
| 'optimal-temperature', |
| 'optimal-ph', |
| 'million_full', |
| |
| 'PPA-ppi', |
| ] |
|
|
| |
| standard_data_benchmark = [ |
| 'ec-active', |
| 'EC', |
| 'GO-CC', |
| 'GO-BP', |
| 'GO-MF', |
| 'MB', |
| 'DeepLoc-2', |
| 'DeepLoc-10', |
| 'enzyme-kcat', |
| 'optimal-temperature', |
| 'optimal-ph', |
| 'fitness-prediction', |
| ] |
|
|
|
|
| vector_benchmark = [ |
| |
| 'EC', |
| |
| 'GO-CC', |
| 'GO-BP', |
| 'GO-MF', |
| 'Subcellular', |
| |
| 'shs27-ppi-bfs', |
| 'shs148-ppi-bfs', |
| 'string-ppi-bfs', |
| |
| 'MB', |
| 'DeepLoc-2', |
| 'DeepLoc-10', |
| 'solubility', |
| 'temperature-stability', |
| 'material-production', |
| 'fitness-prediction', |
| 'number-of-folds', |
| 'cloning-clf', |
| 'stability-prediction', |
| 'ec-active', |
| 'soluprot', |
| |
| 'taxon_species', |
| 'diff_phylogeny', |
| |
| 'plm-interact', |
| 'gold-ppi', |
| |
| 'enzyme-kcat', |
| 'optimal-temperature', |
| 'optimal-ph', |
| 'million_full', |
| |
| 'PPA-ppi', |
| ] |
|
|
|
|
| testing = [ |
| 'EC', |
| 'DeepLoc-2', |
| 'DeepLoc-10', |
| 'enzyme-kcat', |
| 'human-ppi', |
| 'plddt', |
| 'SecondaryStructure-3', |
| ] |