import traceback from argparse import ArgumentParser from concurrent.futures import ProcessPoolExecutor from glob import glob from tqdm import tqdm from anticipation.convert import midi_to_compound from anticipation.config import PREPROC_WORKERS def convert_midi(filename, debug=False): try: tokens = midi_to_compound(filename, debug=debug) except Exception: if debug: print('Failed to process: ', filename) print(traceback.format_exc()) return 1 with open(f"{filename}.compound.txt", 'w') as f: f.write(' '.join(str(tok) for tok in tokens)) return 0 def main(args): filenames = glob(args.dir + '/**/*.mid', recursive=True) \ + glob(args.dir + '/**/*.midi', recursive=True) print(f'Preprocessing {len(filenames)} files with {PREPROC_WORKERS} workers') with ProcessPoolExecutor(max_workers=PREPROC_WORKERS) as executor: results = list(tqdm(executor.map(convert_midi, filenames), desc='Preprocess', total=len(filenames))) discards = round(100*sum(results)/float(len(filenames)),2) print(f'Successfully processed {len(filenames) - sum(results)} files (discarded {discards}%)') if __name__ == '__main__': parser = ArgumentParser(description='prepares a MIDI dataset') parser.add_argument('dir', help='directory containing .mid files for training') main(parser.parse_args())