| |
| |
| |
| |
|
|
| |
| |
| |
| mkcls_args = -n10 -c50 |
|
|
| |
| %/mgiza.cfg: m1=5 |
| %/mgiza.cfg: m2=0 |
| %/mgiza.cfg: mh=5 |
| %/mgiza.cfg: m3=3 |
| %/mgiza.cfg: m4=3 |
| %/mgiza.cfg: nodumps=0 |
| %/mgiza.cfg: onlyaldumps=0 |
| %/mgiza.cfg: model4smoothfactor=0.4 |
| %/mgiza.cfg: nsmooth=4 |
| %/mgiza.cfg: NCPUS=8 |
|
|
| |
| symal_grow_diag_final_and = -a=g -d=yes -f=yes -b=yes |
| symal_args = ${symal_grow_diag_final_and} |
|
|
| |
| |
| |
| gizaln = ${WDIR}/crp/trn/aln/giza |
| giztmp = $(gizaln)/tmp |
| gizout = $(gizaln) |
| gizaln.in = ${WDIR}/crp/trn/pll/clean/ |
|
|
| .PHONY: giza giza-prep |
|
|
| giza: | $(gizout)/${L1}.txt.gz |
| giza: | $(gizout)/${L2}.txt.gz |
| giza: | $(gizout)/${L1}-${L2}.symal.gz |
| @echo "GIZA WORD ALIGNMENT COMPLETE!" |
|
|
| other = $(if $(findstring $(1),${L1}),${L2},${L1}) |
| fwd = $(1)-$(call other,$(1)) |
| bwd = $(call other,$(1))-$(1) |
|
|
| $(gizout)/${L1}.txt.gz: a3file = $(gizout)/${L2}-${L1}.A3.final.gz |
| $(gizout)/${L1}.txt.gz: | $(gizout)/${L2}-${L1}.A3.final.gz |
| $(lock) |
| (zcat $(a3file) | perl -ne 'print if ++$$ctr%3 == 2;' | gzip > $@_) && mv $@_ $@ |
| $(unlock) |
|
|
| $(gizout)/${L2}.txt.gz: a3file = $(gizout)/${L1}-${L2}.A3.final.gz |
| $(gizout)/${L2}.txt.gz: | $(gizout)/${L1}-${L2}.A3.final.gz |
| $(lock) |
| (zcat $(a3file) | perl -ne 'print if ++$$ctr%3 == 2;' | gzip > $@_) && mv $@_ $@ |
| $(unlock) |
|
|
| $(gizout)/${L1}-${L2}.symal.gz: A3fwd = $(gizout)/${L1}-${L2}.A3.final.gz |
| $(gizout)/${L1}-${L2}.symal.gz: A3bwd = $(gizout)/${L2}-${L1}.A3.final.gz |
| $(gizout)/${L1}-${L2}.symal.gz: | $(gizout)/${L1}-${L2}.A3.final.gz |
| $(gizout)/${L1}-${L2}.symal.gz: | $(gizout)/${L2}-${L1}.A3.final.gz $(giza2bal.pl) |
| $(lock) |
| $(giza2bal.pl) -d 'gunzip -c ${A3fwd}' -i 'gunzip -c ${A3bwd}' \ |
| | $(symal) $(symal_args) | perl -pe 's/^.*{##}\s+//' | gzip > $@_ && mv $@_ $@ |
| $(unlock) |
|
|
| |
| $(gizout)/%.A3.final.gz: | $(giztmp)/%/mgiza.DONE |
| mkdir -p ${@D} |
| $(lock) |
| $(mgiza.merge) $(shell ls $(giztmp)/$*/$*.A3.final.part* 2>/dev/null) | gzip > $@_ |
| mv $@_ $@ |
| $(unlock) |
|
|
| |
| %/mgiza.DONE: | %/mgiza.cfg |
| $(lock) |
| $(mgiza) $| && touch $@ |
| $(unlock) |
|
|
| $(giztmp) $(gizout): |
| mkdir -p $@ |
|
|
| |
| |
| |
| |
| stream = find -L $(1) -type f -name "*.$(2)" -or -name "*.$(2).gz" | sort | xargs zcat -f |
| mkcls_cmd = $(call stream,$(1),$(2)) | ${mkcls} $(mkcls_args) -p/dev/stdin -V$(3) opt |
| |
| |
| |
| |
|
|
| $(giztmp)/%.vcb.classes: ${mkcls} |
| $(giztmp)/%.vcb.classes: | $(gizaln.in) |
| @echo CREATING $@ |
| $(lock) |
| mkdir -p $(@D) |
| @$(call mkcls_cmd,$|,$*,$@_) && mv $@_ $@ |
| @mv $@_.cats $@.cats |
| $(unlock) |
|
|
| |
| $(giztmp)/${L1}.vcb: | $(giztmp)/${L1}-${L2}.snt |
| $(giztmp)/${L1}.vcb: | $(giztmp)/${L1}-${L2}.snt |
| $(giztmp)/${L2}.vcb: | $(giztmp)/${L1}-${L2}.snt |
| $(giztmp)/${L2}-${L1}.snt: | $(giztmp)/${L1}-${L2}.snt |
| |
| $(giztmp)/${L1}-${L2}.snt: L1files = $(addsuffix .${L1}.gz, $(pll-clean)) |
| $(giztmp)/${L1}-${L2}.snt: L2files = $(addsuffix .${L2}.gz, $(pll-clean)) |
| $(giztmp)/${L1}-${L2}.snt: | $(giztmp) |
| $(giztmp)/${L1}-${L2}.snt: | $(addsuffix .${L1}.gz, $(pll-clean)) |
| $(giztmp)/${L1}-${L2}.snt: | $(addsuffix .${L2}.gz, $(pll-clean)) |
| $(lock) |
| $(plain2snt) \ |
| <(ls $(L1files) | xargs zcat -f) \ |
| <(ls $(L2files) | xargs zcat -f) \ |
| -vcb1 $(giztmp)/${L1}.vcb_ -vcb2 $(giztmp)/${L2}.vcb_ \ |
| -snt1 $(giztmp)/${L1}-${L2}.snt_ -snt2 $(giztmp)/${L2}-${L1}.snt_ |
| mv $(giztmp)/${L1}.vcb_ $(giztmp)/${L1}.vcb |
| mv $(giztmp)/${L2}.vcb_ $(giztmp)/${L2}.vcb |
| mv $(giztmp)/${L1}-${L2}.snt_ $(giztmp)/${L1}-${L2}.snt |
| mv $(giztmp)/${L2}-${L1}.snt_ $(giztmp)/${L2}-${L1}.snt |
| $(unlock) |
|
|
| |
| $(giztmp)/${L1}-${L2}.cooc: V1 = $(giztmp)/${L1}.vcb |
| $(giztmp)/${L1}-${L2}.cooc: V2 = $(giztmp)/${L2}.vcb |
| $(giztmp)/${L2}-${L1}.cooc: V1 = $(giztmp)/${L2}.vcb |
| $(giztmp)/${L2}-${L1}.cooc: V2 = $(giztmp)/${L1}.vcb |
| $(giztmp)/%.cooc: | $(giztmp)/%.snt |
| @echo CREATING $@ |
| $(lock) |
| $(snt2cooc) $@_ ${V1} ${V2} $| && mv $@_ $@ |
| $(unlock) |
|
|
| |
| |
| |
| $(giztmp)/%/mgiza.cfg: SHELL=bash |
| # --- CORPUS RESOURCES --------------------------------------------------------- |
| $(giztmp)/%/mgiza.cfg: V1 = $(giztmp)/${FROM}.vcb |
| $(giztmp)/%/mgiza.cfg: V2 = $(giztmp)/${TO}.vcb |
| $(giztmp)/%/mgiza.cfg: SNT = $(giztmp)/${FROM}-${TO}.snt |
| $(giztmp)/%/mgiza.cfg: COOC = $(giztmp)/${FROM}-${TO}.cooc |
| $(giztmp)/%/mgiza.cfg: ODIR = $(giztmp)/${FROM}-${TO}/${FROM}-${TO} |
| |
| $(giztmp)/${L1}-${L2}/mgiza.cfg: FROM = ${L1} |
| $(giztmp)/${L1}-${L2}/mgiza.cfg: TO = ${L2} |
| $(giztmp)/${L2}-${L1}/mgiza.cfg: FROM = ${L2} |
| $(giztmp)/${L2}-${L1}/mgiza.cfg: TO = ${L1} |
| $(giztmp)/${L1}-${L2}/mgiza.cfg: | $(giztmp)/${L1}-${L2}.cooc |
| $(giztmp)/${L1}-${L2}/mgiza.cfg: | $(giztmp)/${L1}-${L2}.snt |
| $(giztmp)/${L2}-${L1}/mgiza.cfg: | $(giztmp)/${L2}-${L1}.cooc |
| $(giztmp)/${L2}-${L1}/mgiza.cfg: | $(giztmp)/${L2}-${L1}.snt |
| $(giztmp)/${L1}-${L2}/mgiza.cfg \ |
| $(giztmp)/${L2}-${L1}/mgiza.cfg: | \ |
| $(giztmp)/${L1}.vcb $(giztmp)/${L1}.vcb.classes \ |
| $(giztmp)/${L2}.vcb $(giztmp)/${L2}.vcb.classes |
| mkdir -p $(dir $@) |
| touch $@ |
| @echo "s ${V1}" >> $@ |
| @echo "t ${V2}" >> $@ |
| @echo "c ${SNT}" >> $@ |
| @echo "cooc ${COOC}" >> $@ |
| @echo "m1 ${m1}" >> $@ |
| @echo "m2 ${m2}" >> $@ |
| @echo "mh ${mh}" >> $@ |
| @echo "m3 ${m3}" >> $@ |
| @echo "m4 ${m4}" >> $@ |
| @echo "t1 ${m1}" >> $@ |
| @echo "t2 ${m2}" >> $@ |
| @echo "th ${mh}" >> $@ |
| @echo "t3 ${m3}" >> $@ |
| @echo "t4 ${m4}" >> $@ |
| @echo "o ${ODIR}" >> $@ |
| @echo "model4smoothfactor ${model4smoothfactor}" >> $@ |
| @echo "onlyaldumps ${onlyaldumps}" >> $@ |
| @echo "nodumps ${nodumps}" >> $@ |
| @echo "nsmooth ${nsmooth}" >> $@ |
| @echo "NCPUS ${NCPUS}" >> $@ |
| |
| |
| ifeq ($(gizaln),) |
| $(warning Giza base directory not defined) |
| endif |
| ifeq ($(gizaln.in),) |
| $(warning No directory for Giza++ training data specified!) |
| endif |
|
|