beatalignment / train /midi_preprocess.py
william590y's picture
Upload folder using huggingface_hub
151b875 verified
import traceback
from argparse import ArgumentParser
from concurrent.futures import ProcessPoolExecutor
from glob import glob
from tqdm import tqdm
from anticipation.convert import midi_to_compound
from anticipation.config import PREPROC_WORKERS
def convert_midi(filename, debug=False):
try:
tokens = midi_to_compound(filename, debug=debug)
except Exception:
if debug:
print('Failed to process: ', filename)
print(traceback.format_exc())
return 1
with open(f"{filename}.compound.txt", 'w') as f:
f.write(' '.join(str(tok) for tok in tokens))
return 0
def main(args):
filenames = glob(args.dir + '/**/*.mid', recursive=True) \
+ glob(args.dir + '/**/*.midi', recursive=True)
print(f'Preprocessing {len(filenames)} files with {PREPROC_WORKERS} workers')
with ProcessPoolExecutor(max_workers=PREPROC_WORKERS) as executor:
results = list(tqdm(executor.map(convert_midi, filenames), desc='Preprocess', total=len(filenames)))
discards = round(100*sum(results)/float(len(filenames)),2)
print(f'Successfully processed {len(filenames) - sum(results)} files (discarded {discards}%)')
if __name__ == '__main__':
parser = ArgumentParser(description='prepares a MIDI dataset')
parser.add_argument('dir', help='directory containing .mid files for training')
main(parser.parse_args())