from multiprocessing import Pool import copy import argparse from process_pretrain_data import Process # filenames = ['xaa', 'xab', 'xac', 'xad', 'xae', 'xaf', 'xag', 'xah', 'xai', 'xaj', 'xak', 'xal', 'xam', 'xan', 'xao', 'xap', 'xaq', 'xar', 'xas', 'xat', 'xau', 'xav', 'xaw'] # filenames = ['xaa', 'xab'] def main(): parser = argparse.ArgumentParser() parser.add_argument( "--sampling_rate", default=1.0, type=float, help="We will sample sampling_rate*total_length*2/512 times", ) parser.add_argument( "--kmer", default=1, type=int, help="K-mer", ) parser.add_argument( "--length", default=10000, type=int, help="Length of the sampled sequence", ) parser.add_argument( "--file_path", default=None, type=str, help="The path of the file to be processed", ) parser.add_argument( "--output_path", default="/home/zhihan/dna/data/split/", type=str, help="The path of the file to be processed", ) args = parser.parse_args() # multiprocess p = Pool(22) for i in range(1,23): arg_new = copy.deepcopy(args) arg_new.file_path = "/root/data/genome/" + "GRCh38.chr" + str(i) + ".fa" arg_new.output_path = "/root/data/sub_001_6140/" + "GRCh38.chr" + str(i) + ".fa" # arg_new.file_path = arg_new.output_path + filename p.apply_async(Process, args=(arg_new,)) p.close() p.join() if __name__ == "__main__": main()