# -*- makefile -*- # M4M module for word alignment with fast_align # see http://aclweb.org/anthology-new/N/N13/N13-1073 (paper) # see https://github.com/clab/fast_align (github) fstaln ?= ${WDIR}/crp/trn/aln/fast fstaln.in = $(addprefix ${WDIR}/crp/trn/pll/clean/, ${pllshards}) # symal symal_grow_diag_final_and = -a=g -d=yes -f=yes -b=yes symal_args = ${symal_grow_diag_final_and} fastalign: | pll-ready fastalign: options = -d -v -o fastalign: $(fstaln)/${L1}.txt.gz fastalign: $(fstaln)/${L2}.txt.gz fastalign: $(fstaln)/${L1}-${L2}.symal.gz .INTERMEDIATE: $(fstaln)/tmp/${L1}-${L2}.fwd.gz .INTERMEDIATE: $(fstaln)/tmp/${L1}-${L2}.bwd.gz $(fstaln)/${L1}-${L2}.symal.gz: | $(fstaln)/tmp/${L1}-${L2}.fwd.gz $(fstaln)/${L1}-${L2}.symal.gz: | $(fstaln)/tmp/${L1}-${L2}.bwd.gz $(fstaln)/${L1}-${L2}.symal.gz: | $(fstaln)/${L1}.txt.gz $(fstaln)/${L1}-${L2}.symal.gz: | $(fstaln)/${L2}.txt.gz $(lock) ${m4mdir}/scripts/fast-align2bal.py \ <(zcat $(fstaln)/${L1}.txt.gz) \ <(zcat $(fstaln)/${L2}.txt.gz) \ <(zcat $(fstaln)/tmp/${L1}-${L2}.fwd.gz) \ <(zcat $(fstaln)/tmp/${L1}-${L2}.bwd.gz) \ | ${symal} ${symal_args} | gzip > $@_ && mv $@_ $@ rm -rf $(fstaln)/tmp $(unlock) .INTERMEDIATE: $(fstaln)/tmp/${L1}-${L2}.txt $(fstaln)/tmp/${L1}-${L2}.txt: | $(fstaln)/${L1}.txt.gz $(fstaln)/tmp/${L1}-${L2}.txt: | $(fstaln)/${L2}.txt.gz $(lock) paste -d '\t' <(zcat $(fstaln)/${L1}.txt.gz) <(zcat $(fstaln)/${L2}.txt.gz) \ | perl -pe 's/\t/ \|\|\| /' > $@_ && mv $@_ $@ $(unlock) $(fstaln)/tmp/${L1}-${L2}.fwd.gz: options ?= $(fstaln)/tmp/${L1}-${L2}.fwd.gz: | $(fast_align) $(fstaln)/tmp/${L1}-${L2}.fwd.gz: | $(fstaln)/tmp/${L1}-${L2}.txt $(fstaln)/tmp/${L1}-${L2}.fwd.gz: $(lock) ${fast_align} -i ${@D}/${L1}-${L2}.txt ${options} | gzip > $@_ && mv $@_ $@ $(unlock) $(fstaln)/tmp/${L1}-${L2}.bwd.gz: options ?= $(fstaln)/tmp/${L1}-${L2}.bwd.gz: | $(fast_align) $(fstaln)/tmp/${L1}-${L2}.bwd.gz: | $(fstaln)/tmp/${L1}-${L2}.txt $(fstaln)/tmp/${L1}-${L2}.bwd.gz: $(lock) ${fast_align} -r -i ${@D}/${L1}-${L2}.txt ${options} | gzip > $@_ && mv $@_ $@ $(unlock) $(fstaln)/${L2}.txt.gz: | $(addsuffix .${L2}.gz, ${fstaln.in}) $(lock) ifeq ($(words ${pllshards}),1) @cp -l $| $@ || cp $| $@ else @cat $| > $@_ && mv $@_ $@ endif $(unlock) $(fstaln)/${L1}.txt.gz: | $(addsuffix .${L1}.gz, ${fstaln.in}) $(lock) ifeq ($(words ${pllshards}),1) @cp -l $| $@ || cp $| $@ else @cat $| > $@_ && mv $@_ $@ endif $(unlock) # install fast-align if you don't have it fast-align.git = https://github.com/clab/fast_align.git ${fast_align}: $(lock) git clone ${fast-align.git} cd fast_align && make mkdir -p $(dir ${fast_align}) cp fast_align/fast_align $(dir ${fast_align}) rm -rf fast_align $(unlock)