import os
import os.path
from typing import Final


class GlobalParameters:
    """
    Class to store global parameters
    Attributes:
        base_dir (str): base directory of project files
        data_dir (str): directory that holds data files
        plot_dir (str): directory that holds figure files
        classifier_result_dir (str): directory that holds classifier result files
        classifier_model_dir (str): directory that holds classifier model files
        neopep_data_org_file (str): tab file containing all neo-peptide data
        mutation_data_org_file (str): tab file containing all mutation data
        neopep_data_ml_sel_file (str): tab file containing rows of neo-peptide data selected for ML
        mutation_data_ml_sel_file (str): tab file containing rows of mutation data selected for ML
        neopep_data_ml_file (str): tab file containing neo-peptide data normalized for ML
        mutation_data_ml_file (str): tab file containing mutation data data normalized for ML
        neopep_data_plot_file (str): tab file containing neo-peptide data normalized for histogram and scatter plots
        mutation_data_plot_file (str): tab file containing mutation data normalized for histogram and scatter plots
        cat_to_num_info_files (dict[str, dict[str, str]]): dictionary with file names for imputation of categorical
                                                           variables
        tesla_result_file (str): results from TESLA paper containing FR, TTIF, and AUPRC scores of different groups
        gartner_nmer_train_file (str): training data matrix from Gartner et al with mutation features and immunogenicity
                                       annotation downloaded from figshare link provided in Gartner et al
        gartner_nmer_test_file (str): testing data matrix from Gartner et al with mutation features and immunogenicity
                                       annotation downloaded from figshare link provided in Gartner et al
        gartner_nmer_rank_file (str): file containing the ranking of mutations in NCI_test obtained by Gartner et al.
        gartner_mmp_rank_file (str): file containing the ranking of neo-peptides in NCI_test obtained by Gartner et al.
        hlaI_allele_file (str): file containing the HLA class I alleles of all patients
        datasets (list[str]): datasets used in this study  ['NCI', 'NCI_train', 'NCI_test', 'TESLA', 'HiTIDE']
        datasets_encoding (list[str]): datasets used for encoding categorical values  ['NCI', 'NCI_train']
        peptide_types (list[str]): peptide types ['neopep', 'mutation']
        objectives (list[str]): objectives for data normalization ['ml', 'plot']
        response_types (list[str]): immunogenicity measurement response types ['CD8', 'negative', 'not_tested']
        mutation_types (list[str]): mutation types to include ['SNV', 'INSERTION', 'DELETION', 'FSS']
        classifiers (list[str]): classifiers used in this study
        aas (list[str]): list of amino acids
        ml_features_neopep (list[str]): list of features used for classification of neo-peptides
        features_neopep (list[str]): list of features for neo-peptides
        feature_types_neopep (dict[str, any]): types of features_neopep
        ml_feature_mv_neopep (dict[str, str]): order of features_neopep values (used for missing value imputation)
        ml_features_mutation (list[str]): list of features used for classification of neo-peptides
        features_mutation (list[str]): list of features for neo-peptides
        feature_types_mutation (dict[str, any]): types of features_neopep
        ml_feature_mv_mutation (dict[str, str]): order of features_mutation values (used for missing value imputation)
        nr_hyperopt_rep (int): number of replicate hyperopt runs
        nr_hyperopt_iter (int): number of hyperopt iterations
        nr_hyperopt_cv (int): number of hyperopt cross-validation folds
        neopep_alpha (float): value of alpha in rank_score function used for training neo-peptides
        mutation_alpha (float): value of alpha in rank_score function used for training mutations
        normalizer (str): normalizer to be used ('q': quantile, 'p': power, 'z': standard, 'i': minmax, 'l': log, 'a': asinh, 'n': none)
        nr_non_immuno_neopeps (int): nr non-immunogenic peptides sampled
        cat_type (str): conversion of categorical to numerical values. either 'float' or 'int'
        max_netmhc_rank (float): maximal netmhc rank for neo-peptide. -1 if no filter applied
        excluded_genes (list): peptides of these genes are excluded from prioritization
        plot_normalization (dict): feature normalization for plots only (not for ML)
        plot_feature_names (dict): feature names used in plots
        color_immunogenic (str): color used to represent immunogenic peptides in plots
        color_negative (str): color used to represent non-immunogenic peptides in plots
    """

    base_dir: Final[str] = os.getenv('NEORANKING_RESOURCE')
    code_dir: Final[str] = os.getenv('NEORANKING_CODE')
    data_dir: Final[str] = os.path.join(base_dir, "data")
    plot_dir: Final[str] = os.path.join(base_dir, "plots")
    classifier_result_dir: Final[str] = os.path.join(base_dir, "classifier_results")
    classifier_model_dir: Final[str] = os.path.join(base_dir, "classifier_models")

    neopep_data_org_file: Final[str] = os.path.join(data_dir, "Neopep_data_org.txt")
    mutation_data_org_file: Final[str] = os.path.join(data_dir, "Mutation_data_org.txt")
    neopep_data_ml_sel_file: Final[str] = os.path.join(data_dir, "Neopep_data_ml_sel.txt")
    mutation_data_ml_sel_file: Final[str] = os.path.join(data_dir, "Mutation_data_ml_sel.txt")
    neopep_data_ml_file: Final[str] = os.path.join(data_dir, "Neopep_data_ml_norm.txt")
    mutation_data_ml_file: Final[str] = os.path.join(data_dir, "Mutation_data_ml_norm.txt")
    neopep_data_plot_file: Final[str] = os.path.join(data_dir, "Neopep_data_plot_norm.txt")
    mutation_data_plot_file: Final[str] = os.path.join(data_dir, "Mutation_data_plot_norm.txt")

    cat_to_num_info_files: Final[dict] = \
        {
            'neopep': {'NCI_train': os.path.join(data_dir, 'cat_encoding', 'Cat_to_num_info_neopep_NCI_train.txt'),
                       'NCI': os.path.join(data_dir, 'cat_encoding', 'Cat_to_num_info_neopep_NCI_all.txt')},
            'mutation': {'NCI_train': os.path.join(data_dir, 'cat_encoding', 'Cat_to_num_info_mutation_NCI_train.txt'),
                         'NCI': os.path.join(data_dir, 'cat_encoding', 'Cat_to_num_info_mutation_NCI_all.txt')}
        }

    tesla_result_file: Final[str] = os.path.join(data_dir, "mmc5.xlsx")
    gartner_nmer_train_file: Final[str] = os.path.join(data_dir, 'NmersTrainingSet.txt')
    gartner_nmer_test_file: Final[str] = os.path.join(data_dir, 'NmersTestingSet.txt')
    gartner_nmer_rank_file: Final[str] = os.path.join(code_dir, 'Data/Gartner_nmers_ranking.txt')
    hlaI_allele_file: Final[str] = os.path.join(data_dir, 'hla', 'HLA_allotypes.txt')

    datasets: Final[list] = ['NCI', 'NCI_train', 'NCI_test', 'TESLA', 'HiTIDE']
    datasets_encoding: Final[list] = ['NCI', 'NCI_train']
    peptide_types: Final[list] = ['neopep', 'mutation']
    objectives: Final[list] = ['ml', 'plot']
    response_types: Final[list] = ['CD8', 'negative', 'not_tested']
    mutation_types: Final[list] = ['SNV', 'INSERTION', 'DELETION', 'FSS']

    aas: Final[list] = \
        ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']

    classifiers = ['SVM', 'SVM-lin', 'RF', 'CART', 'ADA', 'LR', 'NNN', 'XGBoost']
    neopep_alpha: Final[float] = 0.005
    mutation_alpha: Final[float] = 0.05
    nr_hyperopt_rep = 10
    nr_hyperopt_iter = 200
    nr_hyperopt_cv = 5
    normalizer: Final[str] = 'n'
    nr_non_immuno_neopeps: Final[int] = 500000
    cat_type: Final[str] = 'float'  # either float or int
    max_netmhc_rank: Final[int] = 20

    excluded_genes: Final[list] = ['HLA-A', 'HLA-B', 'HLA-C', 'HLA-DRB1', 'HLA-DRB3', 'HLA-DRB4', 'HLA-DRB5',
                                   'HLA-DPA1', 'HLA-DPB1', 'HLA-DQA1', 'HLA-DQB1', 'HLA-DMA', 'TRBV3', 'TRBV5',
                                   'TRBV6', 'TRBV6-1', 'TRBV10', 'TRBV10-1', 'TRBV11', 'TRAV12', 'KRT1', 'PRSS3']
    # Neo Test
    ml_features_neopep: Final[list] = \
        [
         'mutant_other_significant_alleles', 'mutant_rank', 'mutant_rank_PRIME',
         'mutant_rank_netMHCpan', 
         'mut_Rank_Stab', 'mut_netchop_score_ct',
         'TAP_score',  
         'seq_len']

    features_neopep: Final[list] = \
        ['patient', 'dataset', 'train_test', 'response_type', 'Nb_Samples', 'Sample_Tissue', 'Cancer_Type',
         'chromosome', 'genomic_coord', 'ref', 'alt', 'gene', 'protein_coord', 'aa_mutant', 'aa_wt',
         'pep_mut_start', 'TumorContent', 'Zygosity', 'mutation_type'] + ml_features_neopep

    # Neo Test
    feature_types_neopep: Final[dict] = {
        'patient': 'str',
        'dataset': 'category',
        'train_test': 'category',
        'response_type': 'category',
        'Nb_Samples': 'str',
        'Sample_Tissue': 'str',
        'Cancer_Type': 'str',
        'chromosome': 'str',
        'genomic_coord': 'int64',
        'ref': 'str',
        'alt': 'str',
        'gene': 'str',
        'protein_coord': 'int32',
        'aa_mutant': 'category',
        'aa_wt': 'category',
        'mutant_seq': 'str',
        'wt_seq': 'str',
        'pep_mut_start': 'int8',
        'TumorContent': 'float64',
        'Zygosity': 'category',
        'mutation_type': 'category',
        'mutant_rank': 'float64',
        'mutant_rank_netMHCpan': 'float64',
        'mutant_rank_PRIME': 'float64',
        'mut_Rank_Stab': 'float64',
        'TAP_score': 'float64',
        'mut_netchop_score_ct': 'float64',
        'mutant_other_significant_alleles': 'int8',
        'seq_len': 'category'
    }

    # Neo Test
    ml_feature_mv_neopep: Final[dict] = {
        'mutant_rank': 'max',
        'mutant_rank_netMHCpan': 'max',
        'mutant_rank_PRIME': 'max',
        'mut_Rank_Stab': 'max',
        'TAP_score': 'min',
        'mut_netchop_score_ct': 'min',
        'mutant_other_significant_alleles': 'min',
    }

    ml_features_mutation: Final[list] = \
        ['CCF', 'Clonality', 'Zygosity', 'Sample_Tissue_expression_GTEx',
         'TCGA_Cancer_expression', 'rnaseq_TPM', 'rnaseq_alt_support',
         'MIN_MUT_RANK_CI_MIXMHC', 'COUNT_MUT_RANK_CI_MIXMHC',
         'WT_BEST_RANK_CI_MIXMHC', 'MIN_MUT_RANK_CI_PRIME',
         'COUNT_MUT_RANK_CI_PRIME', 'WT_BEST_RANK_CI_PRIME',
         'COUNT_MUT_RANK_CI_netMHCpan', 'CSCAPE_score', 'gene_driver_Intogen',
         'nb_mutations_in_gene_Intogen', 'nb_same_mutation_Intogen',
         'mutation_driver_statement_Intogen', 'GTEx_all_tissues_expression_mean',
         'bestWTMatchScore_I', 'bestWTMatchOverlap_I', 'bestMutationScore_I',
         'bestWTPeptideCount_I', 'mut_Rank_EL_0', 'wt_Rank_EL_0',
         'mut_Rank_EL_1', 'wt_Rank_EL_1', 'mut_Rank_EL_2', 'wt_Rank_EL_2',
         'mut_Rank_Stab_0', 'mut_Rank_Stab_1', 'mut_Rank_Stab_2',
         'mut_netchop_score', 'mut_TAP_score_0', 'next_best_BA_mut_ranks',
         'DAI_0', 'DAI_1', 'DAI_2']

    features_mutation: Final[list] = \
        ['patient', 'dataset', 'train_test', 'response_type', 'Nb_Samples', 'Sample_Tissue', 'Cancer_Type',
         'chromosome', 'genomic_coord', 'ref', 'alt', 'gene', 'protein_coord', 'aa_mutant', 'aa_wt', 'pep_mut_start',
         'TumorContent', 'mutation_type'] + ml_features_mutation

    feature_types_mutation: Final[dict] = {
        'patient': 'category',
        'dataset': 'category',
        'train_test': 'category',
        'response_type': 'category',
        'Nb_Samples': 'str',
        'Sample_Tissue': 'str',
        'Cancer_Type': 'str',
        'chromosome': 'str',
        'genomic_coord': 'int64',
        'ref': 'str',
        'alt': 'str',
        'gene': 'str',
        'protein_coord': 'int32',
        'aa_mutant': 'category',
        'aa_wt': 'category',
        'mutant_seq': 'str',
        'wt_seq': 'str',
        'pep_mut_start': 'int8',
        'TumorContent': 'float64',
        'CCF': 'float64',
        'Clonality': 'category',
        'Zygosity': 'category',
        'mutation_type': 'category',
        'nb_same_mutation_Intogen': 'float64',
        'nb_mutations_in_gene_Intogen': 'float64',
        'mutation_driver_statement_Intogen': 'category',
        'gene_driver_Intogen': 'category',
        'rnaseq_TPM': 'float64',
        'TCGA_Cancer_expression': 'float64',
        'bestMutationScore_I': 'float64',
        'bestWTPeptideCount_I': 'int32',
        'bestWTMatchScore_I': 'float64',
        'bestWTMatchOverlap_I': 'float64',
        'rnaseq_alt_support': 'float64',
        'CSCAPE_score': 'float64',
        'GTEx_all_tissues_expression_mean': 'float64',
        'Sample_Tissue_expression_GTEx': 'float64',
        'COUNT_MUT_RANK_CI_MIXMHC': 'int32',
        'COUNT_MUT_RANK_CI_PRIME': 'int32',
        'COUNT_MUT_RANK_CI_netMHCpan': 'int32',
        'MIN_MUT_RANK_CI_MIXMHC': 'float64',
        'WT_BEST_RANK_CI_MIXMHC': 'float64',
        'MIN_MUT_RANK_CI_PRIME': 'float64',
        'WT_BEST_RANK_CI_PRIME': 'float64',
        'next_best_BA_mut_ranks': 'float64',
        'mut_Rank_EL_0': 'float64',
        'mut_Rank_EL_1': 'float64',
        'mut_Rank_EL_2': 'float64',
        'wt_Rank_EL_0': 'float64',
        'wt_Rank_EL_1': 'float64',
        'wt_Rank_EL_2': 'float64',
        'mut_Rank_Stab_0': 'float64',
        'mut_Rank_Stab_1': 'float64',
        'mut_Rank_Stab_2': 'float64',
        'DAI_0': 'float64',
        'DAI_1': 'float64',
        'DAI_2': 'float64',
        'mut_TAP_score_0': 'float64',
        'mut_netchop_score': 'float64'
    }

    ml_feature_mv_mutation: Final[dict] = {
        'nb_same_mutation_Intogen': 'min',
        'nb_mutations_in_gene_Intogen': 'min',
        'rnaseq_TPM': 'min',
        'TCGA_Cancer_expression': 'min',
        'bestMutationScore_I': 'min',
        'bestWTPeptideCount_I': 'min',
        'bestWTMatchScore_I': 'min',
        'bestWTMatchOverlap_I': 'min',
        'rnaseq_alt_support': 'min',
        'CCF': 0.9,
        'CSCAPE_score': 'min',
        'GTEx_all_tissues_expression_mean': 'min',
        'Sample_Tissue_expression_GTEx': 'min',
        'COUNT_MUT_RANK_CI_MIXMHC': 'min',
        'COUNT_MUT_RANK_CI_PRIME': 'min',
        'COUNT_MUT_RANK_CI_netMHCpan': 'min',
        'MIN_MUT_RANK_CI_MIXMHC': 'max',
        'WT_BEST_RANK_CI_MIXMHC': 'max',
        'MIN_MUT_RANK_CI_PRIME': 'max',
        'WT_BEST_RANK_CI_PRIME': 'max',
        'next_best_BA_mut_ranks': 'max',
        'mut_Rank_EL_0': 'max',
        'mut_Rank_EL_1': 'max',
        'mut_Rank_EL_2': 'max',
        'wt_Rank_EL_0': 'max',
        'wt_Rank_EL_1': 'max',
        'wt_Rank_EL_2': 'max',
        'mut_Rank_Stab_0': 'max',
        'mut_Rank_Stab_1': 'max',
        'mut_Rank_Stab_2': 'max',
        'DAI_0': 'cnt',
        'DAI_1': 'cnt',
        'DAI_2': 'cnt',
        'mut_TAP_score_0': 'min',
        'mut_netchop_score': 'min'
    }

    #
    # Visualization
    #
    color_immunogenic = 'darkorange'
    color_negative = 'royalblue'
    plot_file_formats = ['pdf', 'svg', 'png']

    plot_normalization: Final[dict] = \
        {'mutant_rank_PRIME': 'l', 'wt_best_rank_PRIME': 'l', 'mutant_rank': 'l', 'wt_best_rank': 'l',
         'mutant_rank_netMHCpan': 'l', 'wt_best_rank_netMHCpan': 'l', 'mut_Rank_Stab': 'l', 'wt_Rank_Stab': 'l',
         'mut_Stab_Score': 'n', 'wt_Stab_Score': 'n', 'TAP_score': 'n', 'mut_netchop_score_ct': 'n',
         'mut_binding_score': 'n', 'mut_is_binding_pos': 'n', 'pep_mut_start': 'i', 'mut_aa_coeff': 'n', 'DAI': 'n',
         'rnaseq_TPM': 'a', 'rnaseq_alt_support': 'n', 'GTEx_all_tissues_expression_mean': 'a',
         'Sample_Tissue_expression_GTEx': 'a', 'TCGA_Cancer_expression': 'a', 'bestWTMatchScore_I': 'a',
         'bestWTMatchOverlap_I': 'n', 'bestMutationScore_I': 'a', 'bestWTPeptideCount_I': 'a', 'bestWTMatchType_I': 'n',
         'mutant_other_significant_alleles': 'n', 'CSCAPE_score': 'n', 'Clonality': 'n',
         'CCF': 'n', 'nb_same_mutation_Intogen': 'a', 'nb_mutations_in_gene_Intogen': 'a',
         'nb_mutations_same_position_Intogen': 'a', 'mutation_driver_statement_Intogen': 'n',
         'gene_driver_Intogen': 'n', 'DAI_NetMHC': 'n', 'DAI_MixMHC': 'n', 'DAI_NetStab': 'n',
         'DAI_MixMHC_mbp': 'n', 'seq_len': 'n', 'DAI_aa_coeff': 'n', 'mut_Rank_EL_0': 'l',
         'mut_Rank_EL_1': 'l', 'mut_Rank_EL_2': 'l', 'wt_Rank_EL_0': 'l', 'wt_Rank_EL_1': 'l', 'wt_Rank_EL_2': 'l',
         'mut_Rank_Stab_0': 'l', 'mut_Rank_Stab_1': 'l', 'mut_Rank_Stab_2': 'l', 'DAI_0': 'n', 'DAI_1': 'n',
         'DAI_2': 'n', 'mut_TAP_score_0': 'n', 'mut_netchop_score': 'n', 'COUNT_MUT_RANK_CI_MIXMHC': 'n',
         'COUNT_MUT_RANK_CI_PRIME': 'n', 'COUNT_MUT_RANK_CI_netMHCpan': 'n', 'mut_nr_strong_binders_0': 'n',
         'mut_nr_weak_binding_alleles_0': 'n', 'MIN_MUT_RANK_CI_MIXMHC': 'l', 'WT_BEST_RANK_CI_MIXMHC': 'l',
         'MIN_MUT_RANK_CI_PRIME': 'l', 'WT_BEST_RANK_CI_PRIME': 'l', 'next_best_BA_mut_ranks': 'l'
         }

    plot_feature_names: Final[dict] = \
        {'mutant_rank': 'MixMHCpred Rank', 'mutant_rank_netMHCpan': 'NetMHCpan Rank', 'mutant_rank_PRIME': 'PRIME Rank',
         'mut_Rank_Stab': 'NetStab Rank', 'TAP_score': 'NetTAP Score', 'mut_netchop_score_ct': 'NetChop CT Score',
         'mut_binding_score': 'MixMHCpred Score at Mutation', 'mut_is_binding_pos': 'Mutation at Anchor',
         'pep_mut_start': 'Mutation Position', 'mut_aa_coeff': 'PRIME Coeff at Mutation',
         'DAI_NetMHC': 'NetMHCpan log_Rank DAI', 'DAI_MixMHC': 'MixMHCpred log_Rank DAI',
         'DAI_NetStab': 'NetStab log_Rank DAI', 'mutant_other_significant_alleles': 'Number Binding Alleles',
         'DAI_MixMHC_mbp': 'MixMHCpred Score DAI', 'rnaseq_TPM': 'RNAseq Expression(TPM)',
         'rnaseq_alt_support': 'RNAseq Mutation Coverage',
         'GTEx_all_tissues_expression_mean': 'GTEx Mean Tissue Expression',
         'Sample_Tissue_expression_GTEx': 'GTEx Sample Tissue Expression',
         'TCGA_Cancer_expression': 'TCGA Cancer Expression',
         'bestWTMatchScore_I': 'ipMSDB Peptide Score', 'bestWTMatchOverlap_I': 'ipMSDB Peptide Overlap',
         'bestMutationScore_I': 'ipMSDB Mutation Score', 'bestWTPeptideCount_I': 'ipMSDB Peptide Count',
         'bestWTMatchType_I': 'ipMSDB Peptide Match Type', 'CSCAPE_score': 'CSCAPE Score', 'Zygosity': 'Zygosity',
         'Clonality': 'Clonality', 'CCF': 'Cancer Cell Fraction',
         'nb_same_mutation_Intogen': 'Intogen Same Mutation Count',
         'nb_mutations_in_gene_Intogen': 'Intogen Gene Mutation Count',
         'nb_mutations_same_position_Intogen': 'Intogen Mutation Same Position Count',
         'mutation_driver_statement_Intogen': 'Intogen Mutation Driver Statement',
         'gene_driver_Intogen': 'Gene Driver Intogen', 'pep_mut_start_9': 'Mutation Position Length 9',
         'pep_mut_start_10': 'Mutation Position Length 10', 'pep_mut_start_11': 'Mutation Position Length 11',
         'pep_mut_start_12': 'Mutation Position Length 12', 'seq_len': 'Peptide Length',
         'DAI_aa_coeff': 'PRIME Coefficient DAI', 'COUNT_MUT_RANK_CI_MIXMHC': 'MixMHCpred Binding Peptide Count',
         'COUNT_MUT_RANK_CI_PRIME': 'PRIME Binding Peptide Count',
         'COUNT_MUT_RANK_CI_netMHCpan': 'NetMHC Binding Peptide Count',
         'MIN_MUT_RANK_CI_MIXMHC': 'Minimal Mut MixMHCpred Rank', 'MIN_MUT_RANK_CI_PRIME': 'Minimal Mut PRIME Rank',
         'WT_BEST_RANK_CI_MIXMHC': 'Minimal WT MixMHCpred Rank', 'WT_BEST_RANK_CI_PRIME': 'Minimal WT PRIME Rank',
         'next_best_BA_mut_ranks': 'Second Mut BA rank', 'mut_Rank_EL_0': 'Best Mut EL Rank',
         'mut_Rank_EL_1': 'Second Mut EL Rank', 'mut_Rank_EL_2': 'Third Mut EL Rank', 'wt_Rank_EL_0': 'Best WT EL Rank',
         'wt_Rank_EL_1': 'Second WT EL Rank', 'wt_Rank_EL_2': 'Third WT EL Rank',
         'mut_Rank_Stab_0': 'Best Mut Stab Rank',
         'mut_Rank_Stab_1': 'Second Mut Stab Rank', 'mut_Rank_Stab_2': 'Third Mut Stab Rank',
         'DAI_0': 'BEST EL Rank DAI',
         'DAI_1': 'Second EL Rank DAI', 'DAI_2': 'Third EL Rank DAI', 'mut_TAP_score_0': 'Best Mut TAP Score',
         'mut_netchop_score': 'Best Mut NetChop Score'
         }

    @staticmethod
    def get_cat_to_num_info_file(dataset: str, peptide_type: str):
        if dataset in GlobalParameters.datasets_encoding:
            return GlobalParameters.cat_to_num_info_files[peptide_type][dataset]
        else:
            return None