| DS=dataset/ | |
| TOK=tokenized/ | |
| ARCHS=( arm32 aarch64 x64 ) | |
| SPLITS=( train valid test ) | |
| for arch in ${ARCHS[@]} | |
| do | |
| mkdir -p ${TOK}/${arch} | |
| # Train the BPE tokenizer | |
| python3 remend.bpe -i ${DS}/${arch} -o ${TOK}/${arch} | |
| cp ${DS}/${arch}/{train,valid,test}.eqn ${TOK}/${arch} | |
| fairseq-preprocess -s asm -t eqn \ | |
| --trainpref ${TOK}/${arch}/train \ | |
| --testpref ${TOK}/${arch}/test \ | |
| --validpref ${TOK}/${arch}/valid \ | |
| --destdir ${TOK}/${arch}/ | |
| done | |