diff --git a/mosesdecoder/contrib/m4m/examples/giza-vs-fast.m4m b/mosesdecoder/contrib/m4m/examples/giza-vs-fast.m4m new file mode 100644 index 0000000000000000000000000000000000000000..e5e56dc2ab8c9fa602c9c8678cd47401275534d8 --- /dev/null +++ b/mosesdecoder/contrib/m4m/examples/giza-vs-fast.m4m @@ -0,0 +1,99 @@ +# -*- Makefile -*- + +# some variables need to be set before m4m modules are included +.SECONDARY: + +MOSES_ROOT = ${HOME}/code/moses/master/mosesdecoder +MGIZA_ROOT = ${HOME}/tools/mgiza +fast_align = ${HOME}/bin/fast_align + +# L1: source language; L2: target language +L1 = de +L2 = en +WDIR = $(CURDIR) + +include ${MOSES_ROOT}/contrib/m4m/modules/m4m.m4m + +# both systems use the same language model +L2raw := $(wildcard ${WDIR}/crp/trn/*/raw/*.${L2}.gz) +L2data := $(subst /raw/,/cased/,${L2raw}) +lm.order = 5 +lm.factor = 0 +lm.lazy = 1 +lm.file = ${WDIR}/lm/${L2}.5-grams.kenlm +${lm.file}: | $(L2data) +$(eval $(call add_kenlm,${lm.file},${lm.order},${lm.factor},${lm.lazy})) +.INTERMEDIATE: ${L2data} + +dmodels = wbe-mslr-bidirectional-fe-allff +mysystem = systems/${word-alignment}-aligned +myptable = model/tm/${aligner}.${L1}-${L2} +mydtable = model/dm/${aligner}.${L1}-${L2} + +wa ?= $(error wa not specified on command line) +SYSTEMS := +aligner := +$(foreach a,${wa},\ +$(eval aligner:=${a});\ +$(eval $(clear-ptables));\ +$(eval $(clear-dtables));\ +$(eval SYSTEMS+=systems/${a}-aligned);\ +$(eval $(call add_binary_phrase_table,0,0,4,$${myptable}));\ +$(eval $(call add_binary_reordering_table,0,0,8,\ + ${dmodels},$${mydtable},$${myptable}));\ +$(eval $(call create_moses_ini,$${mysystem}))) + +aln: $(foreach a,${wa},${WDIR}/crp/trn/aln/$a/${L1}-${L2}.symal.gz) +info: +dtable: ${DTABLES} +ptable: ${PTABLES} +system: $(addsuffix /moses.ini.0,${SYSTEMS}) +eval: ${EVALUATIONS} + + +ifdef tune.runs + +TUNED_SYSTEMS := +EVALUATIONS := +$(eval $(tune_all_systems)) +$(eval $(bleu_score_all_systems)) +tune: ${TUNED_SYSTEMS} + echo TUNED ${TUNED_SYSTEMS} +all: ${EVALUATIONS} + +else + +tune: all + +# The recursive calls below make sure that tuning runs happen sequentially +# (moses runs multi-threaded anyway). The reason is that we may want to have +# first results as soon as possible. +tune.runs := 1 1 +$(info TUNE RUNS ${tune.runs}) +all: + $(foreach n,$(shell seq ${tune.runs}),\ + ${MAKE} -f $(word 1, ${MAKEFILE_LIST}) \ + tune.runs="$n $n" ${MAKECMDGOALS} -${MAKEFLAGS}) + +endif + +.PHONY: $(addprefix reset-,lm tm dm all aln tune eval systems) +reset-aln: reset-mm + -rm -rf $(foreach a,${wa},crp/trn/aln/${a}) +reset-mm: reset-dm reset-tm + -rm -rf $(foreach a,${wa},crp/trn/mm/${a}) +reset-dm: reset-systems + -rm -rf $(foreach a,${wa},model/dm/${a}.*) +reset-tm: reset-systems + -rm -rf $(foreach a,${wa},model/tm/${a}.*) +reset-systems: + -rm -rf ${SYSTEMS} +reset-tune: + -rm -rf $(foreach s,${SYSTEMS}/$s/tune) +reset-eval: + -rm -rf $(foreach s,${SYSTEMS},$s/eval) +reset-lm: + -rm -rf lm +reset-all: reset-lm reset-aln + -rm -rf $(wildcard crp/trn/*/[ct]* crp/dev/[ct]* crp/tst/[ct]*) + -rm -rf auxiliary diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/Makefile b/mosesdecoder/contrib/m4m/modules/obsolete/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f785a0354133f23ebc3b4dd36a8f8f6cdaa4e4ab --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/Makefile @@ -0,0 +1,64 @@ +# -*- Makefile -*- +# Mandatory at the beginning of the file, before m4m inclusions + +# L1,L2: tags that identify translation source (L1) +# and translation target (L2) language +L1 ?= de +L2 ?= en + +LL = $(word 1, $(sort ${L1} ${L2}))-$(word 2, $(sort ${L1} ${L2})) +# a name for this experiment +experiment = dynsa-vs-std-phrase-table + +# the working directry +WDIR = $(CURDIR) +MOSES_ROOT = ${HOME}/code/moses/master/mosesdecoder + +# include m4m boilerplate +include ${MOSES_ROOT}/contrib/m4m/modules/m4m.m4m + +$(info M4MDIR=${m4mdir}) + +#include ${m4mdir}/baseline-system.make +#include ${m4mdir}dynsa-system.make +#$(info ${MY_EXPERIMENT}) + +tune.sets = $(subst /raw/,/cased/,$(wildcard crp/dev/raw/*.${L1}.gz)) + +all: +.PHONY: all + +ifdef tune.runs +$(foreach tuneset, $(word 1,${tune.sets:.${L1}.gz=}),\ +$(foreach run,$(shell seq ${tune.runs}),\ + $(eval $(call tune_system,baseline/moses.ini.0,\ + baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\ + ${tuneset}.${L1},${tuneset}.${L2},0));\ + $(if ,$(info $(call tune_system,baseline/moses.ini.0,\ + baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\ + ${tuneset}.${L1},${tuneset}.${L2},0));)\ + $(eval $(call copy_weights,dynsa/moses.ini.0,\ + baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\ + dynsa/tuned/$(notdir ${tuneset})/${run}/moses.ini));\ + $(if ,$(info $(call copy_weights,dynsa/moses.ini.0,\ + baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\ + dynsa/tuned/$(notdir ${tuneset})/${run}/moses.ini));)\ + $(foreach evalset,$(word 2,${tune.sets:.${L1}.gz=}),\ + $(foreach system,baseline dynsa,\ + $(eval evaltarget:=${system}/eval/$(notdir ${tuneset})/${run}/$(notdir ${evalset}));\ + $(eval $(call bleu_eval,${evaltarget},\ + ${system}/tuned/$(notdir ${tuneset})/${run}/moses.ini,\ + ${evalset}.${L1},${moses.inputtype.plaintext},${evalset}.${L2}));\ + $(if ,$(info $(call bleu_eval,${evaltarget},\ + ${system}/tuned/$(notdir ${tuneset})/${run}/moses.ini,\ + ${evalset}.${L1},${moses.inputtype.plaintext},${evalset}.${L2}));)\ + ));\ +)) + +all: ${EVALUATIONS} + echo EVALS ${EVALUATIONS} +else +all: + $(foreach n,$(shell seq 1 1),${MAKE} tune.runs="$n $n";) +endif + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/baseline-system.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/baseline-system.m4m new file mode 100644 index 0000000000000000000000000000000000000000..4125c8e718b6142a4e8b364ce579b0cd9e48fc00 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/baseline-system.m4m @@ -0,0 +1,48 @@ +# -*- Makefile -*- + +# This module defines a simple phrase-based baseline system +# - a single corpus +# - no factors +# - single ttable +# - single distortion model + +# chose a name for the system +# ${system}/moses.ini.0 then defines the system +system = baseline +SYSTEMS += ${system} +.PHONY: ${system} +${system}: ${system}/moses.ini.0 + +################################################################################# +# +# Create phrase table(s) and distortion model(s) that you want to use in this +# system. If you already have binary or text version of all tables, you don't +# need to specify pll.{txt1,txt2,aln}. +pll.txt1 = ${WDIR}/crp/trn/aln/fast/${L1}.txt.gz +pll.txt2 = ${WDIR}/crp/trn/aln/fast/${L2}.txt.gz +pll.aln = ${WDIR}/crp/trn/aln/fast/${L1}-${L2}.symal.gz +ptable = ${WDIR}/model/tm/ptable.${L1}-${L2} +dtable = ${WDIR}/model/dm/dtable.${L1}-${L2} +ptable.max-phrase-length = 7 +# ptable.smoothing = --GoodTuring +# dmodels = wbe-mslr-bidirectional-fe-allff + +LMODEL_ENTRIES = KENLM;name=KENLM0;order=5;factor=0;num-features=1;lazyken=0;path=$(abspath lm/europarl-v7.en.kenlm) +LMODELS = lm/europarl-v7.en.kenlm + +MY_EXPERIMENT += $(call add_binary_phrase_table,0,0,5,${ptable}) +$(eval $(call add_binary_phrase_table,0,0,5,${ptable})) + +if 0 +MY_EXPERIMENT += $(call add_binary_reordering_table,0,0,8,\ + wbe-mslr-bidirectional-fe-allff,${dtable},${ptable}) +$(eval $(call add_binary_reordering_table,0,0,8,\ + wbe-mslr-bidirectional-fe-allff,${dtable},${ptable})) +endif + +MY_EXPERIMENT += $(call create_moses_ini,${system}) +$(eval $(call create_moses_ini,${system})) + +################################################################################# + + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/directory-structure.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/directory-structure.m4m new file mode 100644 index 0000000000000000000000000000000000000000..6020f9141f2d6e85928199051f8f99cf9ebab84f --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/directory-structure.m4m @@ -0,0 +1,7 @@ +# -*- Makefile -*- + +# STANDARD LOCATIONS +basedir ?= $(CURDIR) +tune.dir ?= ${basedir}/tune +eval.dir ?= ${basedir}/eval +input.dir ?= ${basedir}/input diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/model-filtering.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/model-filtering.m4m new file mode 100644 index 0000000000000000000000000000000000000000..7b1933e6c5a54a24e8b62876b3ae216cd3c5f7b5 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/model-filtering.m4m @@ -0,0 +1,37 @@ +# -*- Makefile -*- +# +# This module deals with model filtering (if necessary). +# It produces the moses.ini files for filtered models for +# tuning and evaluation. + +ifndef ${moses_ini_for_tuning} + moses_ini_for_tuning = # WHAT'S THE DEFAULT LOCATION FOR THIS IN EMS? +endif + +ifndef ${moses_ini_for_eval} + moses_ini_for_eval = # WHAT'S THE DEFAULT LOCATION FOR THIS IN EMS? +endif + +# filter models if suggested by set-up +ifneq (${moses_ini_for_tuning}, ${untuned_moses_ini}) + ${moses_ini_for_tuning}: | ${untuned_moses_ini} + ${moses_ini_for_tuning}: | ${tuning_input_ready} + + # phrase table in text format? + ifeq ($(shell grep -v '^ *\#' ${untuned_moses_ini} \ + | grep -A1 '\[ttable-file\]' | tail -n +2 \ + | head -n1 | awk '{print $$1}'),0) + # ADD PHRASE TABLE FILTERING COMMAND HERE + endif + + # how does moses know if a lexicalized distortion table is binary or not? + # ADD LEXICAL DISTORTION TABLE FILTERING COMMAND HERE + +ifneq (${moses_ini_for_eval),$(tuned_moses_ini)) + # add code for model filtering for eval here +endif + + + + + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/phrase-table.make.scratch b/mosesdecoder/contrib/m4m/modules/obsolete/phrase-table.make.scratch new file mode 100644 index 0000000000000000000000000000000000000000..81b869750b003a39feb9832d36cb88c1fa1f74a0 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/phrase-table.make.scratch @@ -0,0 +1,124 @@ +# .PHONY: $1 +# $1: $1.binphr.idx +# $1.txt.gz: | L1text = $4 +# $1.txt.gz: | L2text = $5 +# $1.txt.gz: | symal = $6 +# ${moses.ini}: $1 +# PTABLES += 1;$2;$3;5;$1 +# endef + + +# ${target}.tmp/fwd/scored.gz: | ${target}/phrase-extraction.DONE +# | ${L1File} ${L2File} ${symal} + + +# # convert phrase table from text file to binary format +# %.binphr.idx: | %.txt.gz ${MOSES_BIN}/processPhraseTable +# $(lock) +# zcat -f $*.txt.gz | ${MOSES_BIN}/processPhraseTable \ +# -ttable ${L1factors} ${L2factors} - -nscores 5 -out ${@D}/_${@F} \ +# && mv ${@D}/_${@F} $@ +# $(unlock) + + +# # directory definitions +# mo_mdl = model +# mo_tmp = model/tmp +# wrdaln = ${fstaln}/out +# # wrdaln should be set elsewhere! + +# # milestone files created during phrase table construction +# ptable_bin = ${mo_mdl}/ptable.${L1}-${L2} +# ptable = ${mo_mdl}/ptable.${L1}-${L2}.txt.gz +# lex1given2 = ${mo_mdl}/${L1}-given-${L2}.lex.gz +# lex2given1 = ${mo_mdl}/${L2}-given-${L1}.lex.gz +# mosesinifile = ${mo_mdl}/moses.ini.0 + +# .PHONY: lex ptable +# lex: ${lex1given2} ${lex2given1} +# ptable: ${ptable_bin} + +# # steps taken in this module + +# # ------------------------------------------------------------------------------- +# # --- STEP 1a: extract raw phrases from word-aligned corpus --------------------- +# # ------------------------------------------------------------------------------- +# # Note: the script ${moses.extract-phrases} takes care of initial sorting +# ${mo_tmp}/phrase-extraction.DONE: | ${moses.extract-phrases} +# ${mo_tmp}/phrase-extraction.DONE: | ${moses.extract} +# ${mo_tmp}/phrase-extraction.DONE: | ${wrdaln}/${L1}.txt.gz +# ${mo_tmp}/phrase-extraction.DONE: | ${wrdaln}/${L2}.txt.gz +# ${mo_tmp}/phrase-extraction.DONE: | ${wrdaln}/${L1}-${L2}.symal.gz +# ${mo_tmp}/phrase-extraction.DONE: +# $(lock) +# ${moses.extract-phrases} \ +# ${moses.extract} \ +# ${wrdaln}/${L1}.txt.gz \ +# ${wrdaln}/${L2}.txt.gz \ +# ${wrdaln}/${L1}-${L2}.symal.gz \ +# ${mo_tmp} ${max_phrase_length} \ +# ${dmodel.type}-${dmodel.orientation} \ +# && touch $@ +# $(unlock) + +# # ------------------------------------------------------------------------------- +# # --- STEP 1a: extract word translation lexica from word-aligned corpus --------- +# # --- (for lexical phrase scoring) --------- +# # ------------------------------------------------------------------------------- +# $(lex2given1): $(lex1given2) +# $(lex1given2): | ${wrdaln}/${L1}.txt.gz +# $(lex1given2): | ${wrdaln}/${L2}.txt.gz +# $(lex1given2): | ${wrdaln}/${L1}-${L2}.symal.gz +# $(lock) +# $(moses.make-lex) \ +# ${wrdaln}/${L1}.txt.gz \ +# ${wrdaln}/${L2}.txt.gz \ +# ${wrdaln}/${L1}-${L2}.symal.gz \ +# $(lex1given2) \ +# $(lex2given1) +# $(unlock) + +# # ------------------------------------------------------------------------------- +# # --- STEP 2: score extracted phrase pairs -------------------------------------- +# # ------------------------------------------------------------------------------- +# ptfwdhalf = ${mo_tmp}/fwd/phrases.fwd.scored.gz +# ptbwdhalf = ${mo_tmp}/bwd/phrase-scoring.DONE + +# # ------------------------------------------------------------------------------- +# # --- STEP 2a: score phrases in the 'forward' direction ------------------------- +# # ------------------------------------------------------------------------------- +# $(ptfwdhalf): | ${mo_tmp}/phrase-extraction.DONE +# $(ptfwdhalf): | ${lex1given2} +# $(lock) +# $(merge-sorted) ${mo_tmp}/fwd/part.*.gz \ +# | ${moses.score-phrases} ${MOSES_BIN}/score - ${lex1given2} ${@:.scored.gz=} \ +# $(ptable.smoothing) && mv $@_ $@ +# $(unlock) + +# # ------------------------------------------------------------------------------- +# # --- STEP 2b: score phrases in the 'backward' direction ------------------------- +# # ------------------------------------------------------------------------------- +# # Note: ${moses.score-phrases} re-sorts the scored backward phrases +# $(ptbwdhalf): | ${mo_tmp}/phrase-extraction.DONE +# $(ptbwdhalf): | ${lex2given1} +# $(lock) +# $(merge-sorted) ${mo_tmp}/bwd/part.*.gz \ +# | ${moses.score-phrases} ${MOSES_BIN}/score - ${lex2given1} ${@D}/scored \ +# "$(ptable.smoothing)" --Inverse && touch $@ +# $(unlock) + +# # ------------------------------------------------------------------------------- +# # --- STEP 3: put the two phrase table halves together -------------------------- +# # ------------------------------------------------------------------------------- +# # ptfwdhalf is a single .gz file, ptbwdhalf is a collection .gz files +# $(ptable): | ${MOSES_BIN}/consolidate +# $(ptable): | $(ptfwdhalf) $(ptbwdhalf) +# $(lock) +# ${MOSES_BIN}/consolidate \ +# <(zcat ${ptfwdhalf}) \ +# <(${merge-sorted} ${mo_tmp}/bwd/scored.*.gz) /dev/stdout \ +# $(if $(ptable.smoothing), \ +# $(ptable.smoothing) $(ptfwdhalf:.sorted.gz=.coc)) \ +# | gzip > $@_ && mv $@_ $@ +# $(unlock) + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/reporting.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/reporting.m4m new file mode 100644 index 0000000000000000000000000000000000000000..967e3817773faefc54c669d1ce08a201301ccc3a --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/reporting.m4m @@ -0,0 +1,95 @@ +# -*- Makefile -*- + +rset = set=$2,type=$3,file=evaluation/$1/$2.$3 +analyses = $(foreach e, ${eval-sets}, \ + $(call rset,$1,$e,analysis-precision) \ + $(call rset,$1,$e,analysis-coverage)) +eval-scores = $(foreach e, ${eval-sets}, \ + $(foreach m, ${eval-metrics}, \ + $(call rset,$1,$e,$m))) +eval-results = $(foreach e, ${eval-sets}, \ + $(foreach m, ${eval-metrics}, \ + evaluation/$1/$e.$m)) + + +.SECONDEXPANSION: +# NOTA BENE: setup-experiments.make adds additional dependencies for +# evaluation/%/report in the file experiments.make! +evaluation/%/report: sets = $(call eval-scores,$*) +#evaluation/%/report: sets += $(call analyses,$*) +#evaluation/%/report: tuned_moses_ini := $(if ${have_tuned_moses_ini},${have_tuned_moses_ini},tuning/$*/moses.tuned.ini) +evaluation/%/report: prereqs = $(call eval-results,$*) +evaluation/%/report: $$(prereqs) + echo $(foreach s, ${sets}, $s) $^ + mkdir $@.lock + echo $(call lockline) > $@.lock/owner + ${report} ${sets} > $@_ + mv $@_ $@ + rm $@.lock/owner + rmdir $@.lock + +%.analysis: params1 = -input ${$(notdir $*)-src} +%.analysis: params1 += -input-corpus ${crp_train}.${L1} +%.analysis: params1 += -ttable ${ttable} -dir $@ +%.analysis: params2 = -precision-by-coverage +%.analysis: params2 += -reference ${$(notdir $*)-ref} +%.analysis: params2 += -system $*.truecased +%.analysis: params2 += -segmentation $*.output +%.analysis: params2 += -system-alignment $*.output.wa +%.analysis: params2 += -coverage $@ +%.analysis: | ${ttable} ${crp_train}.${L1} +%.analysis: %.output.wa %.output %.truecased + @echo ANALYSING $^ + @mkdir $@.lock + @echo $(call lockline) > $@.lock/owner + ${analyze} ${params1} + ${analyze} ${params1} ${params2} + @rm$@.lock/owner + @rmdir $@.lock + +%.multi-bleu: %.cleaned + $(info ) + $(info RUNNING MULTI-BLEU on $^) + @mkdir $@.lock + @echo $(call lockline) > $@.lock/owner + ${multi-bleu} ${$(notdir $*)-ref} < $< > $@_ + @mv $@_ $@ + @rm $@.lock/owner + @rmdir $@.lock + +%.truecased: %.cleaned + mkdir $@.lock + $(detruecase) < $< > $@_ + mv $@_ $@ + rmdir $@.lock + +%.cleaned: %.output + $(info ) + $(info CLEANING UP DECODER OUTPUT: $<) + $(info ) + mkdir $@.lock + echo $(call lockline) > $@.lock/owner + $(clean-decoder-output) < $< > $@_ + mv $@_ $@ + rm $@.lock/owner + rmdir $@.lock + +%.output.wa: %.output +evaluation/%.output: decoder_flags += -threads ${moses.threads} -v 0 +evaluation/%.output: decoder_flags += -inputtype ${input-type} +evaluation/%.output: decoder_flags += -alignment-output-file $@.wa +evaluation/%.output: decoder_flags += -t -text-type "test" +evaluation/%.output: decoder_flags += -f ${moses_ini} +evaluation/%.output: input = ${$(notdir $*)-src} +evaluation/%.output: + echo MOSES_INI = ${moses_ini} + @mkdir -p $(@D) + @mkdir $@.lock + @echo $(call lockline) > $@.lock/owner + ${decode} ${decoder_flags} < ${input} > $@_ + @mv $@_ $@ + @rm $@.lock/owner + @rmdir $@.lock + +.SECONDARY: + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/run-moses.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/run-moses.m4m new file mode 100644 index 0000000000000000000000000000000000000000..c24c92a8e5b59518331861bc5df1f782b165da48 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/run-moses.m4m @@ -0,0 +1,37 @@ +# -*- Makefile -*- + +# This make module deals with running the moses decoder. +# It sets default parameters and checks that parameters that +# need to be set elsewhere are actually set. + +# The following parameters are translation-job specific and need to be set +# explicitly for each job. + +moses.threads ?= 4 +moses.flags += -threads ${moses.threads} +moses.flags += -v 0 -t -text-type "test" + +%.moses-out.wa: moses.flags += -alignment-output-file $*.output.wa +%.moses-out.wa: %.moses-out + + +.SECONDEXPANSION: +%.moses-out: + echo MOSES $^ + $(checkvar,moses.input) + $(checkvar,moses.ini) + $(lock) + ${moses} -i ${moses.input} -inputtype ${moses.inputtype} \ + -f ${moses.ini} ${moses.flags} > $@_ && mv $@_ $@ + $(unlock) + +%.cleaned: %.moses-out + $(lock) + $(clean-decoder-output) < $< > $@_ && mv $@_ $@ + $(unlock) + +%.natcased: %.cleaned + $(eval $(call lock)) + $(detruecase) < $*.cleaned > $@_ && mv $@_ $@ + $(eval $(call unlock)) + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/setup-experiments.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/setup-experiments.m4m new file mode 100644 index 0000000000000000000000000000000000000000..dd3bf3dd9076a56cfa3844a37140864606b21544 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/setup-experiments.m4m @@ -0,0 +1,121 @@ +# -*- Makefile -*- + +# This make module sets up the actual experiments + +L1 = fr +L2 = en +tune-ref-ready = /fs/sif0/bhaddow/experiments/accept/symantec-baseline/tuning/reference.tc.18 +eval-ref-ready = /fs/saxnot5/germann/accept/homophones/exp.new/evaluation/201201_devtest_b.reference.tok.1 +crp_train = /fs/sif0/bhaddow/experiments/accept/symantec-baseline/training/corpus.19 +ttable = /fs/sif0/bhaddow/experiments/accept/symantec-baseline/model/phrase-table.10 + +untuned_moses_ini = model/moses.ini.0 +fixed-iweight = --activate-feature d_0,d_1,d_2,d_3,d_4,d_5,d_6,lm_0,w_0,tm_0,tm_1,tm_2,tm_3,tm_4 + +# list the evaluation metrics to be used for evaluation +# TO DO: list available metrics +eval-metrics = multi-bleu +moses-threads = 20 +tuning-runs = $(shell seq 25) + +# experiments.make: WSCHEMES = uniform unigram bigram bigram2 +# experiments.make: DATASETS = tune eval +# experiments.make: PREPROC = baseline uniq multi +# experiments.make: CSETS = unfiltered filtered edited +experiments.make: WSCHEMES = bigram2 +experiments.make: DATASETS = tune eval +experiments.make: PREPROC = baseline +experiments.make: CSETS = filtered +# remake experiments.make if this file changes +experiments.make: $(word $(words ${MAKEFILE_LIST}), ${MAKEFILE_LIST}) +experiments.make: + mkdir $@.lock + echo $(call lockline) > $@.lock/owner + echo '# -*- Makefile -*-' > $@_ + echo '# This file was automatically generated by setup-experiments.make.' >> $@_ + echo 'experiments := ' >> $@_; + $(foreach p, ${PREPROC}, \ + echo '# NEW EXPERIMENT #####################################' >> $@_; \ + echo 'experiments += $p' >> $@_; \ + echo 'ctr = $$(words $${experiments})' >> $@_; \ + echo '$p: input-type = 0' >> $@_; \ + echo '$p: eval-sets = $p.eval' >> $@_; \ + echo '$p: tune-src = input/$p.tune.tc' >> $@_; \ + echo '$p: tune-ref = ${tune-ref-ready}' >> $@_; \ + echo '$p: $p.eval-src = input/$p.eval.tc' >> $@_; \ + echo '$p: $p.eval-ref = ${eval-ref-ready}' >> $@_; \ + echo '$p: evaluation/$${ctr}/report' >> $@_; \ + echo >> $@_; \ + echo 'evaluation/$p/%/$p.eval.output: input = input/$p.eval.tc' >> $@_; \ + echo 'evaluation/$p/%/$p.eval.output: input/$p.eval.tc' >> $@_; \ + echo $(if $(findstring ini,${tuned_moses_ini}), \ + 'evaluation/$${ctr}/$p.eval.output: ${tuned_moses_ini}', \ + 'evaluation/$${ctr}/$p.eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \ + echo $(if $(findstring ini,${tuned_moses_ini}), \ + 'evaluation/$${ctr}/$p.eval.output: moses_ini := ${tuned_moses_ini}', \ + 'evaluation/$${ctr}/$p.eval.output: moses_ini := tuning/$${ctr}/moses.tuned.ini') >> $@_; \ + echo 'evaluation/$${ctr}/$p.eval.multi-bleu: $${$p.eval-ref}' >> $@_; \ + echo >> $@_;) + $(foreach c, ${CSETS}, \ + $(foreach p, ${PREPROC}, \ + $(foreach w, ${WSCHEMES}, \ + echo '# NEW EXPERIMENT #####################################' >> $@_; \ + echo 'experiments += $w-$c-$p' >> $@_; \ + echo 'ctr = $$(words $${experiments})' >> $@_; \ + echo '$w-$c-$p: input-type = 1' >> $@_; \ + echo '$w-$c-$p: eval-sets = $w-$c-$p.eval' >> $@_; \ + echo '$w-$c-$p: tune-src = input/$w-$c-$p.tune.cfn' >> $@_; \ + echo '$w-$c-$p: tune-ref = ${tune-ref-ready}' >> $@_; \ + echo '$w-$c-$p: $w-$c-$p.eval-src = input/$w-$c-$p.eval.cfn' >> $@_; \ + echo '$w-$c-$p: $w-$c-$p.eval-ref = ${eval-ref-ready}' >> $@_; \ + echo '$w-$c-$p: evaluation/$${ctr}/report' >> $@_; \ + echo >> $@_; \ + echo 'evaluation/$${ctr}/$w-$c-$p.eval.output: input = input/$w-$c-$p.eval.cfn' >> $@_; \ + echo 'evaluation/$${ctr}/$w-$c-$p.eval.output: input/$w-$c-$p.eval.cfn' >> $@_; \ + echo $(if $(findstring ini,${tuned_moses_ini}), \ + 'evaluation/$${ctr}/$w-$c-$p.eval.output: ${tuned_moses_ini}', \ + 'evaluation/$${ctr}/$w-$c-$p.eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \ + echo $(if $(findstring ini,${tuned_moses_ini}), \ + 'evaluation/$${ctr}/$w-$c-$p.eval.output: moses_ini := ${tuned_moses_ini}', \ + 'evaluation/$${ctr}/$w-$c-$p.eval.output: moses_ini := tuning/$${ctr}/moses.tuned.ini') >> $@_; \ + echo 'evaluation/$${ctr}/$w-$c-$p.eval.multi-bleu: $${$w-$c-$p.eval-ref}' >> $@_; \ + echo >> $@_;\ + $(foreach d, tune eval, \ + echo 'cfn-targets += input/$w-$c-$p.$d.cfn' >> $@_; \ + echo 'input/$w-$c-$p.$d.cfn: input/$p.$d.tc' >> $@_; \ + printf '\t@mkdir $$@.lock\n\t@echo $$(call lockline) > $$@.lock/owner\n' >> $@_; \ + printf '\tcreate-confusion-network.01.exe -q -w $w -s csets/csets.$c.txt -c ../mm/fr < $$< > $$@_ && mv $$@_ $$@\n' >> $@_;\ + printf '\t@rm $$@.lock/owner\n\t@rmdir $$@.lock\n' >> $@_;)))) + echo '.PHONY += $$(experiments) cfn' >> $@_ + echo 'cfns: $${cfn-targets}' >> $@_ + @mv $@_ $@ + @rm $@.lock/owner + @rmdir $@.lock + + + +# # echo 'ctr = $$(words $${experiments})' >> $@_; \ + # echo 'eval-sets = $w-$c-$p.eval' >> $@_; \ + # echo 'rx := $$(call report-prereqs,$${ctr},$${eval-sets})' >> $@_; \ + # echo '$w-$c-$p: run-id := $${ctr}' >> $@_; \ + # echo '$w-$c-$p: tune-input = input/$w-$c-$p.tune.cfn' >> $@_; \ + # echo '$w-$c-$p: tune-src = input/$w-$c-$p.tune.cfn' >> $@_; \ + # echo '$w-$c-$p: tune-ref = ${tune-ref-ready}' >> $@_; \ + # echo '$w-$c-$p: $w-$c-$p.eval-src = input/$w-$c-$p.eval.cfn' >> $@_; \ + # echo '$w-$c-$p: $w-$c-$p.eval-ref = ${eval-ref-ready}' >> $@_; \ + # echo '$w-$c-$p: input-type = 1' >> $@_; \ + # echo '$w-$c-$p: mert.options += $$(if $$(findstring uniform,$w),${fixed-iweight})' >> $@_; \ + # echo '$w-$c-$p: evaluation/report.$${ctr}' >> $@_; \ + # echo >> $@_; \ + # echo 'evaluation/$w-$c-$p.eval.output.$${ctr}: input = input/$w-$c-$p.eval.cfn' >> $@_; \ + # echo >> $@_; \ + # $(foreach d, tune eval, \ + # ofile=input/$w-$c-$p.$d.cfn; \ + # ifile=input/$p.$d.tc; \ + # echo "$$ofile: $$ifile" >> $@_ ; \ + # printf '\t create-confusion-network.01.exe -w $w -s csets/cset.$c.txt -c ../mm/fr < $$< > $$@_ && mv $$@_ $$@\n' >> $@_ ; \ + # echo >> $@_; )))) + # echo '.PHONY += $$(experiments)' >> $@_ + # @mv $@_ $@ + # @rm $@.lock/owner + # @rmdir $@.lock diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/skip-steps.mak b/mosesdecoder/contrib/m4m/modules/obsolete/skip-steps.mak new file mode 100644 index 0000000000000000000000000000000000000000..1eb38aa5f0d3954b9edeb864380c33bb7f2b044f --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/skip-steps.mak @@ -0,0 +1,19 @@ +# -*- Makefile -*- + +# Specify in this file resources that you already have +run_id ?= 0 + +untuned_moses_ini := model/moses.ini.0 +moses_ini_for_tuning = ${untuned_moses_ini} +moses_ini_for_eval = ${tuned_moses_ini} + +# Notes: +# +# - if ${moses_ini_for_tuning} is different from ${untuned_mose_ini}, the phrase table and the +# lexical distortion table will be filtered for tuning (see tune.make) +# - if ${moses_ini_for_eval} is different from ${tuned_mose_ini}, the phrase table and the +# lexical distortion table will be filtered for evaluation (see eval.make) + + +all: + echo ";$(foo);" diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/system.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/system.m4m new file mode 100644 index 0000000000000000000000000000000000000000..f3cf179ae18c9b63c0f990340526806c04e52659 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/system.m4m @@ -0,0 +1,38 @@ +# -*- Makefile -*- + +# This module defines the actual system + +# Choose names for translation and distortion model +ptable = model/tm/ptable.${L1}-${L2} +dtable = model/dm/dtable.${L1}-${L2} + +# specify the underlying corpus +pll.txt1 ?= crp/trn/aln/${word-alignment}/${L1}.txt.gz +pll.txt2 ?= crp/trn/aln/${word-alignment}/${L2}.txt.gz +pll.aln ?= crp/trn/aln/${word-alignment}/${L1}-${L2}.symal.gz + +# specify the distortion model parameters; we bunch them +# all together in one string +${ptable}: dmodels = wbe-mslr-bidirectional-fe-allff + +# phrase table parameters: maximum phrase length and smoothing +ptable.max-phrase-length = 7 +ptable.smoothing = --GoodTuring + +#$(info $(call add_binary_phrase_table,0,0,5,${ptable},info)) +$(eval $(call add_binary_phrase_table,0,0,5,${ptable})) + +$(eval $(call add_binary_reordering_table,\ +0-0,wbe-mslr-bidirectional-fe-allff,6,${dtable},${ptable})) + +$(info $(call add_binary_reordering_table,\ +0-0,wbe-mslr-bidirectional-fe-allff,6,${dtable},${ptable},info)) + +# below: moses.ini.0 is the moses ini file PRE-TUNING! +define build_system +$1/moses.ini.0 + + +makefile: + $(info $(call add_binary_phrase_table,0,0,5,${ptable},info)) + diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/template.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/template.m4m new file mode 100644 index 0000000000000000000000000000000000000000..55a3307244b8026c3118161c4c8a50e1235626a6 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/template.m4m @@ -0,0 +1,66 @@ +# -*- Makefile -*- + +define setup = + echo 'experiments := ' >> $@_; \ + $(foreach p, ${PREPROC}, \ + echo '# NEW EXPERIMENT #####################################' >> $@_; \ + echo 'experiments += ${tag}' >> $@_; \ + echo 'ctr = $$(words $${experiments})' >> $@_; \ + echo '$: input-type = $(2)' >> $@_; \ + echo '${tag}: eval-sets = ${tag}.eval' >> $@_; \ + echo '${tag}: tune-src = input/${tag}.tune.tc' >> $@_; \ + echo '${tag}: tune-ref = ${tune-ref-ready}' >> $@_; \ + echo '${tag}: ${tag}.eval-src = input/${tag}.eval.$(if $(findstring 1,$(2),cfn,tc))' >> $@_; \ + echo '${tag}: ${tag}.eval-ref = ${eval-ref-ready}' >> $@_; \ + echo '${tag}: evaluation/$${ctr}/report' >> $@_; \ + $(foreach e, ${tag}.eval, \ + $(foreach m, ${eval-metrics}, \ + echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.$m' >> $@_;) \ + echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.analysis' >> $@_;) \ + echo >> $@_; \ + echo 'evaluation/$${ctr}/${tag}.eval.output: input = input/${tag}.eval.tc' >> $@_; \ + echo 'evaluation/$${ctr}/${tag}.eval.output: input/${tag}.eval.tc' >> $@_; \ + echo $(if $(findstring ini,${tuned_moses_ini}), \ + 'evaluation/$${ctr}/${tag}.eval.output: ${tuned_moses_ini}', \ + 'evaluation/$${ctr}/${tag}.eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \ + echo 'evaluation/$${ctr}/${tag}.eval.multi-bleu: $${${tag}.eval-ref}' >> $@_; \ + echo >> $@_;) + echo '.PHONY += $$(experiments)' >> $@_ + @mv $@_ $@ + @rm $@.lock/owner + @rmdir $@.lock + + +# $(1): system / input processing +# $(2): input type (cfn or text) +define setup_experiment = + echo 'experiments := ' >> $@_; \ + $(foreach p, ${PREPROC}, \ + echo '# NEW EXPERIMENT #####################################' >> $@_; \ + echo 'experiments += $(1)' >> $@_; \ + echo 'ctr = $$(words $${experiments})' >> $@_; \ + echo '$(1): input-type = $(2)' >> $@_; \ + echo '$(1): eval-sets = $(1).eval' >> $@_; \ + echo '$(1): tune-src = input/$(1).tune.tc' >> $@_; \ + echo '$(1): tune-ref = ${tune-ref-ready}' >> $@_; \ + echo '$(1): $(1).eval-src = input/$(1).eval.$(if $(findstring 1,$(2),cfn,tc))' >> $@_; \ + echo '$(1): $(1).eval-ref = ${eval-ref-ready}' >> $@_; \ + echo '$(1): evaluation/$${ctr}/report' >> $@_; \ + $(foreach e, $(1).eval, \ + $(foreach m, ${eval-metrics}, \ + echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.$m' >> $@_;) \ + echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.analysis' >> $@_;) \ + echo >> $@_; \ + echo 'evaluation/$${ctr}/$(1).eval.output: input = input/$(1).eval.tc' >> $@_; \ + echo 'evaluation/$${ctr}/$(1).eval.output: input/$(1).eval.tc' >> $@_; \ + echo $(if $(findstring ini,${tuned_moses_ini}), \ + 'evaluation/$${ctr}/$(1).eval.output: ${tuned_moses_ini}', \ + 'evaluation/$${ctr}/$(1).eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \ + echo 'evaluation/$${ctr}/$(1).eval.multi-bleu: $${$(1).eval-ref}' >> $@_; \ + echo >> $@_;) + echo '.PHONY += $$(experiments)' >> $@_ + @mv $@_ $@ + @rm $@.lock/owner + @rmdir $@.lock + +endef diff --git a/mosesdecoder/contrib/m4m/modules/obsolete/tune.m4m b/mosesdecoder/contrib/m4m/modules/obsolete/tune.m4m new file mode 100644 index 0000000000000000000000000000000000000000..1087de072cc4c8cd217f4cf39450d00bee6aa432 --- /dev/null +++ b/mosesdecoder/contrib/m4m/modules/obsolete/tune.m4m @@ -0,0 +1,45 @@ +# -*- Makefile -*- +# make module for tuning a system + +#tune.input ?= $(error missing +#tuned_moses_ini ?= tuning/moses.ini.${run_id} +#$(tuned_moses_ini): | ${untuned_moses_ini} +#$(tuned_moses_ini): | ${untuned_moses_ini} + +# make sure that all necessary variables are set +untuned_moses_ini ?= $(error Fatal error: the required variable untuned_moses_ini is not set) +tuning_input ?= $(error Fatal error: the required variable tuning_input is not set) +tuning_reference ?= $(error Fatal error: the required variable tuning_reference is not set) +tuning_itype ?= $(error Fatal error: the required variable tuning_itype is not set) +tuning_wdir ?= $(error Fatal error: the required variable tuning_wdir is not set) + +$tuning_root_dir ?= ${MOSES_ROOT} + + + +# default tuning parameters +mert.nbest ?= 100 +mert.decoder-threads ?= 4 +tuning/%/tmp/moses.ini: mertcmd = +tuning/%/tmp/moses.ini: mert_flags += --working-dir $(CURDIR)/tuning/$*/tmp +tuning/%/tmp/moses.ini: mert_flags += --decoder-flags "${mert.decoder_flags} -inputtype ${input-type}" +tuning/%/tmp/moses.ini: mert_flags += --rootdir ${MOSES_ROOT}/scripts +tuning/%/tmp/moses.ini: mert_flags += --mertdir ${MOSES_BIN} +tuning/%/tmp/moses.ini: mert_flags += ${mert.options} +tuning/%/tmp/moses.ini: ${untuned_moses_ini} + $(info TUNING: ${tune} ${tune-src} ${tune-ref} ${decode} ${untuned_moses_ini} ${mert_flags}) + @mkdir -p $(@D) + @mkdir $@.lock + @echo $(call lockline) > $@.lock/owner + ${tune} ${mert_flags} ${tune-src} ${tune-ref} ${decode} ${untuned_moses_ini} + @rm $@.lock/owner + @rmdir $@.lock + +tuning/%/moses.tuned.ini: tuning/%/tmp/moses.ini + @mkdir -p $(@D) + @mkdir $@.lock + @echo $(call lockline) > $@.lock/owner + ${apply-weights} tuning/$*/tmp/moses.ini < ${untuned_moses_ini} > $@_ + @mv $@_ $@ + @rm $@.lock/owner + @rmdir $@.lock diff --git a/mosesdecoder/contrib/m4m/scripts/fast-align2bal.py b/mosesdecoder/contrib/m4m/scripts/fast-align2bal.py new file mode 100644 index 0000000000000000000000000000000000000000..1408d20030920038636486d0c406bf7fbfbfec26 --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/fast-align2bal.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# Auxiliary script to convert fast_align output to the "bal" input format +# that symal requires. +# Script by Ulrich Germann. + +# command line args: +# +# +# TO DO: - proper argument parsing with getopt +# - help text + +import sys,os + +(T1,T2,fwd,bwd) = [open(x) for x in sys.argv[1:]] + +def alnvec(slen,alinks,mode): + d = dict([[int(x[mode]),int(x[(mode+1)%2])+1] for x + in [y.split('-') for y in alinks]]) + return [d.get(i,0) for i in xrange(slen)] + +ctr = 0 +for t1 in T1: + t1 = t1.strip().split() + t2 = T2.readline().strip().split() + a1 = alnvec(len(t1),bwd.readline().split(),0) + a2 = alnvec(len(t2),fwd.readline().split(),1) + print 1 + print len(t2), " ".join(t2), '#', " ".join(["%d"%x for x in a2]) + print len(t1), " ".join(t1), '#', " ".join(["%d"%x for x in a1]) + ctr += 1 + pass diff --git a/mosesdecoder/contrib/m4m/scripts/giza.txt2snt.sh b/mosesdecoder/contrib/m4m/scripts/giza.txt2snt.sh new file mode 100644 index 0000000000000000000000000000000000000000..c12359933f0cbf67b8f5eca9700a6116f9a09c20 --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/giza.txt2snt.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Wrapper script around plain2snt that allows us to generate the numberized +# files from gzipped text files via named pipes. (c) 2011-2012 Ulrich Germann + +fail() +{ + echo $@ + exit 1 +} + +on_term() +{ + rm $odir/${L1} + rm $odir/${L2} +} + +trap 'on_term' TERM EXIT QUIT INT 0 + +if [ $# -lt 4 ]; then + fail "usage: $0 " +fi + +txtdir=$1 +L1=$2 +L2=$3 +odir=$4 + +mkdir -p $odir +mkfifo $odir/${L1} || exit 1 +mkfifo $odir/${L2} || exit 1 + +find -L ${txtdir} -name "*.${L1}" -or -name "*.${L1}.gz" | sort | xargs zcat -f > $odir/${L1} & +find -L ${txtdir} -name "*.${L2}" -or -name "*.${L2}.gz" | sort | xargs zcat -f > $odir/${L2} & + +pushd $odir +plain2snt ${L1} ${L2} +wait +mv ${L1}_${L2}.snt ${L1}-${L2}.snt +mv ${L2}_${L1}.snt ${L2}-${L1}.snt +wait +popd diff --git a/mosesdecoder/contrib/m4m/scripts/moses.extract-phrases.sh b/mosesdecoder/contrib/m4m/scripts/moses.extract-phrases.sh new file mode 100644 index 0000000000000000000000000000000000000000..5f95fbc5530bb6e7943a2728232a93f806c240ec --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/moses.extract-phrases.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# helper script for phrase extraction +# (c) 2011-2012 Ulrich Germann +# txtdir - directory with gzipped plain text files +# sntdir - directory with files in Giza's .snt format, also including the .OK files +# produced by giza.txt2snt.sh +# gizdir - directory where aligned corpus resides +# L1,L2 - language tags for L1,L2 +# plmax - max phrase length to be extraced + +extractor=$1 +L1_text=$2 +L2_text=$3 +aln=$4 +odir=$5 +max_plen=$6 +dmodel=$7 + + +echo $# +if [ $# -lt 6 ] ; then + echo < " +EOF +exit 1 +fi + +fifo=$odir/fifo.$$ + +cleanup() +{ + if [ -e $fifo ] ; then rm $fifo; fi + if [ -e $fifo.inv ] ; then rm $fifo.inv; fi + if [ -e $fifo.o ] ; then rm $fifo.o; fi +} + +trap 'cleanup' 0 +export LC_ALL=C +mkdir -p $odir/fwd $odir/bwd $odir/dst +mkfifo $fifo +parallel < $fifo -j6 --pipe --blocksize 250M "sort -S 5G | gzip > $odir/fwd/part.{#}.gz" & +mkfifo $fifo.inv +parallel < $fifo.inv -j6 --pipe --blocksize 250M "sort -S 5G | gzip > $odir/bwd/part.{#}.gz" & +if [ "$dmodel" != "" ] ; then + mkfifo $fifo.o + parallel < $fifo.o -j6 --pipe --blocksize 250M "sort -S 5G | gzip > $odir/dst/part.{#}.gz" & + dmodel="orientation --model $dmodel" +fi +#echo "($extractor <(zcat -f $L2_text) <(zcat -f $L1_text) <(zcat -f $aln) $fifo $max_plen $dmodel) || exit 1" +($extractor <(zcat -f $L2_text) <(zcat -f $L1_text) <(zcat -f $aln) $fifo $max_plen $dmodel) || exit 1 + +wait + +# for part in fwd bwd dst; do +# echo -n '' > $odir/${part}/sort.batch +# for f in $odir/${part}/part.[0-9][0-9][0-9][0-9].gz; do +# g=`echo $f | sed 's/.gz$//'` +# # echo "f=$g; if [ -e \$f.gz ] ; then zcat \$f.gz | LC_ALL=C sort | gzip > \$f.gz_ && mv \$f.gz_ \$f.sorted.gz && rm \$f.gz; fi" \ +# echo "f=$g; if [ -e \$f.gz ] ; then zcat \$f.gz | LC_ALL=C sort | gzip > \$f.gz_ && mv \$f.gz_ \$f.sorted.gz; fi" \ +# >> $odir/${part}/sort.batch +# done +# done + diff --git a/mosesdecoder/contrib/m4m/scripts/moses.make-lex.py b/mosesdecoder/contrib/m4m/scripts/moses.make-lex.py new file mode 100644 index 0000000000000000000000000000000000000000..686c65c4b30653108658bc80dcf9a353236fe220 --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/moses.make-lex.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +# Quick hack to extract lexica from Giza-Aligned corpus +# (c) 2011 Ulrich Germann + +import sys, os + +D = os.popen("zcat %s" % sys.argv[1]) +E = os.popen("zcat %s" % sys.argv[2]) +A = os.popen("zcat %s" % sys.argv[3]) +d_given_e = sys.argv[4] +e_given_d = sys.argv[5] + +try: + os.makedirs(os.path.dirname(d_given_e)) + os.makedirs(os.path.dirname(e_given_d)) +except: + pass + +WD = ["NULL","UNK"] +WE = ["NULL","UNK"] +VD = {} +VE = {} +JJ = [] +MD = [] +ME = [] + +def id(V,W,x): + i = V.setdefault(x,len(W)) + if i == len(W): W.append(x) + return i + +ctr = 0 +for dline in D: + ctr += 1 + #if ctr % 1000 == 0: sys.stderr.write('.') + eline = E.readline() + aline = A.readline() + d = [id(VD,WD,w) for w in dline.strip().split()] + e = [id(VE,WE,w) for w in eline.strip().split()] + a = [[int(y) for y in x.split('-')] for x in aline.split()] + + while len(MD) <= len(VD) + 2: + MD.append(0) + JJ.append({}) + pass + + while len(ME) <= len(VE) + 2: + ME.append(0) + pass + + fd = [0 for i in xrange(len(d))] + fe = [0 for i in xrange(len(e))] + for x,y in a: + fd[x] += 1 + fe[y] += 1 + MD[d[x]] += 1 + ME[e[y]] += 1 + JJ[d[x]][e[y]] = JJ[d[x]].setdefault(e[y],0) + 1 + # print WD[d[x]],WE[e[y]],JJ[d[x]][e[y]] + pass + for i in [d[k] for k in xrange(len(d)) if fd[k] == 0]: + ME[0] += 1 + MD[i] += 1 + JJ[i][0] = JJ[i].setdefault(0,0) + 1 + pass + for i in [e[k] for k in xrange(len(e)) if fe[k] == 0]: + ME[i] += 1 + MD[0] += 1 + JJ[0][i] = JJ[0].setdefault(i,0) + 1 + pass + pass + +ED = os.popen("gzip > %s" % e_given_d, 'w') +DE = os.popen("gzip > %s" % d_given_e, 'w') + +for d in xrange(len(JJ)): + T = JJ[d] + for e,jj in T.items(): + print >>ED, WE[e], WD[d], float(jj)/MD[d] + print >>DE, WD[d], WE[e], float(jj)/ME[e] + pass + pass + +ED.close() +DE.close() diff --git a/mosesdecoder/contrib/m4m/scripts/moses.phrase-extract.sh b/mosesdecoder/contrib/m4m/scripts/moses.phrase-extract.sh new file mode 100644 index 0000000000000000000000000000000000000000..aac0fae7b949bc6cb836c7b43ac361732848e76d --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/moses.phrase-extract.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# Helper script for phrase extraction from a single corpus shard. +# Written by Ulrich Germann. + +# to be added: built-in factor filtering for factored models + +cleanup() +{ + if [ -e $fifo ] ; then rm $fifo; fi + if [ -e $fifo.inv ] ; then rm $fifo.inv; fi + if [ -e $fifo.o ] ; then rm $fifo.o; fi +} + +usage() +{ + echo + echo "$0: wrapper script to extract phrases from word-aligned corpus" + echo -e "usage:\n $0 [-x] " + echo "options:" + echo "-l: maximum phrase length ($plen)" + echo "-m: distortion model specification" + echo "-o: base name for output files .fwd.gz .bwd.gz [..dst.gz]" + echo "-x: (no argument) don't create .fwd.gz and .bwd.gz" + echo + echo "required input files: ..gz ibase..gz ibase..gz" +} + +plen=7 +nottable= +dmodel= +dspec= +pargs= +sfactors= +tfactors= +while [ $# -gt 0 ]; do + case $1 in + -l*) plen=${1#-l} + plen=${plen#=} + if [ -z $plen ] ; then + shift + plen=$1 + fi + ;; + -m*) dmodel=${1#-m} + dmodel=${dmodel#=} + if [ -z $dmodel ] ; then + shift + dmodel="$1" + fi + ;; + -o*) obase=${1#-o} + obase=${obase#=} + if [ -z $obase ] ; then + shift + obase=$1 + fi + ;; + -s*) sfactors=${1#-s} + sfactors=${sfactors#=} + if [ -z $sfactors ] ; then + shift + sfactors = $1 + fi + ;; + -t*) tfactors=${1#-t} + tfactors=${tfactors#=} + if [ -z $tfactors ] ; then + shift + sfactors = $1 + fi + ;; + -x) nottable=1;; + -h) usage; exit 0;; + *) pargs=(${pargs[*]} $1);; + esac + shift +done + +if [ -n "$sfactors" ] || [ -n "$tfactors" ] ; then + echo "Factor filtering is not implemented yet!" + exit 2 +fi + +extract=${pargs[0]} +ibase=${pargs[1]} +L1tag=${pargs[2]} +L2tag=${pargs[3]} +obase=${obase:=$ibase} + +fifo=$obase.$$ +trap 'cleanup' 0 + +export LC_ALL=C +if [ -z "$nottable" ] ; then +mkfifo $fifo; sort -S 5G < $fifo | gzip > $obase.fwd.gz & +mkfifo $fifo.inv; sort -S 5G < $fifo.inv | gzip > $obase.bwd.gz & +fi +if [ -n "$dmodel" ] ; then + mkfifo $fifo.o + sort -S 5G < $fifo.o | gzip > $obase.dst.gz & + dspec="orientation --model " + dspec+=`echo $dmodel | perl -pe 's/((hier|phrase|wbe)-(msd|msrl|mono)).*/$1/;'` +fi + +txt1=${ibase}.${L1tag}.gz +txt2=${ibase}.${L2tag}.gz +aln=${ibase}.aln.gz +echo "($extract <(zcat -f $txt1) <(zcat -f $txt2) <(zcat -f $aln) $fifo $plen $dspec) || exit 1" +($extract <(zcat -f $txt2) <(zcat -f $txt1) <(zcat -f $aln) $fifo $plen $dspec) || exit 1 +wait diff --git a/mosesdecoder/contrib/m4m/scripts/moses.score-phrases.sh b/mosesdecoder/contrib/m4m/scripts/moses.score-phrases.sh new file mode 100644 index 0000000000000000000000000000000000000000..abad3175a1623bbaf0ede3584b39eaa90fc066c9 --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/moses.score-phrases.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Wrapper script around the moses phrase scoring utility. +# Script by Ulrich Germann. Called from within M4M. +# +# lexicon given should be +# de-given-en for fwd +# en-given-de for bwd + +binary=$1 +phrases=$2 +lex=$3 +obase=$4 +smoothing=$5 +inv=$6 + +cleanup() +{ + if [ -e $obase.$$ ] ; then rm $obase.$$; fi + if [ -e $obase.$$.coc ] ; then mv $obase.$$.coc $obase.coc; fi +} + +mkfifo $obase.$$ || exit 1 + +trap 'cleanup' 0 + +export LC_ALL=C +if [[ "$inv" == "--Inverse" ]] ; then + parallel --gnu < $obase.$$ -j10 --pipe --blocksize 250M "sort -S 10G | gzip > $obase.{#}.gz" & +else + gzip < $obase.$$ > $obase.scored.gz_ & +fi + +if [[ $phrases != "-" && $phrases != "/dev/stdin" ]] ; then + $binary $phrases <(zcat -f $lex) $obase.$$ $smoothing $inv || exit 1 +else + $binary /dev/stdin <(zcat -f $lex) $obase.$$ $smoothing $inv || exit 1 +fi + +if [ $? ] ; then exit $?; fi +wait +exit $?; diff --git a/mosesdecoder/contrib/m4m/scripts/moses.transfer-weights.py b/mosesdecoder/contrib/m4m/scripts/moses.transfer-weights.py new file mode 100644 index 0000000000000000000000000000000000000000..0b5bc689995491be834f5fe2a51a276484eca78a --- /dev/null +++ b/mosesdecoder/contrib/m4m/scripts/moses.transfer-weights.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +# Combines the system definition from one .ini file with the weights contained +# in another. Works for the new moses.ini format with fully named feature +# functions. Writes the new .ini file to stdout +# Script by Ulrich Germann. + +import re,sys,os +from optparse import OptionParser + +SectionHeaderPattern = re.compile(r'^\[(.*)\]\s*$') +def read_ini(filename): + ''' + Reads a moses.ini file and returns a dictionary mapping + from section names to a list of lines contained in that section. + ''' + AllSections = {} + CurSection = AllSections.setdefault('',[]) + for line in open(filename): + line = line.strip() + m = SectionHeaderPattern.match(line) + if m: + CurSection = AllSections.setdefault(m.group(1),[]) + elif len(line): + CurSection.append(line) + pass + pass + return AllSections + +parser = OptionParser() +parser.add_option("-s", "--system", dest = "system", + help = "moses.ini file defining the system") +parser.add_option("-w", "--weights", dest = "weight", + help = "moses.ini file defining the system") + +opts,args = parser.parse_args() + +system = read_ini(opts.system) +weight = read_ini(opts.weight) + +for s in system: + if len(s) == 0 or s[0:6] == 'weight': continue + print "[%s]"%s + print "\n".join(system[s]) + print + pass + +if 'weight' in weight: + print '[weight]' + print "\n".join(weight['weight']) +else: + for s in weight: + if s[0:6] != 'weight': continue + print "[%s]"%s + print "\n".join(system[s]) + print + pass + pass + + + diff --git a/mosesdecoder/contrib/m4m/util/Jamfile b/mosesdecoder/contrib/m4m/util/Jamfile new file mode 100644 index 0000000000000000000000000000000000000000..21ae132f5c43dc8dfcce83befec06eec766beab2 --- /dev/null +++ b/mosesdecoder/contrib/m4m/util/Jamfile @@ -0,0 +1,12 @@ +external-lib bzip2 ; +external-lib zlib ; + +exe merge-sorted : +merge-sorted.cc +$(TOP)/moses/TranslationModel/UG/mm//mm +$(TOP)/moses/TranslationModel/UG/generic//generic +$(TOP)//boost_iostreams +$(TOP)//boost_program_options +; + + diff --git a/mosesdecoder/contrib/memscore/Makefile.in b/mosesdecoder/contrib/memscore/Makefile.in new file mode 100644 index 0000000000000000000000000000000000000000..fa1f6e9f5fd92639e5b4e03b6e04dacfe2157c65 --- /dev/null +++ b/mosesdecoder/contrib/memscore/Makefile.in @@ -0,0 +1,581 @@ +# Makefile.in generated by automake 1.9.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# memscore - in-memory phrase scoring for Statistical Machine Translation +# Christian Hardmeier, FBK-irst, Trento, 2010 +# $Id$ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = . +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +bin_PROGRAMS = memscore$(EXEEXT) +@IRSTLM_TRUE@am__append_1 = phraselm.cpp phraselm.h +@CHANNEL_SCORER_TRUE@am__append_2 = channel-scorer.cpp channel-scorer.h +subdir = . +DIST_COMMON = $(am__configure_deps) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/config.h.in \ + $(top_srcdir)/configure depcomp install-sh missing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_boost_base.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno configure.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) +PROGRAMS = $(bin_PROGRAMS) +am__memscore_SOURCES_DIST = datastorage.h memscore.h phrasetable.h \ + scorer.h scorer-impl.h statistic.h timestamp.h phrasetable.cpp \ + memscore.cpp scorer.cpp lexdecom.cpp lexdecom.h phraselm.cpp \ + phraselm.h channel-scorer.cpp channel-scorer.h +@IRSTLM_TRUE@am__objects_1 = phraselm.$(OBJEXT) +@CHANNEL_SCORER_TRUE@am__objects_2 = channel-scorer.$(OBJEXT) +am_memscore_OBJECTS = phrasetable.$(OBJEXT) memscore.$(OBJEXT) \ + scorer.$(OBJEXT) lexdecom.$(OBJEXT) $(am__objects_1) \ + $(am__objects_2) +memscore_OBJECTS = $(am_memscore_OBJECTS) +memscore_DEPENDENCIES = +DEFAULT_INCLUDES = -I. -I$(srcdir) -I. +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +SOURCES = $(memscore_SOURCES) +DIST_SOURCES = $(am__memscore_SOURCES_DIST) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + { test ! -d $(distdir) \ + || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -fr $(distdir); }; } +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +distuninstallcheck_listfiles = find . -type f -print +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BOOST_CPPFLAGS = @BOOST_CPPFLAGS@ +BOOST_LDFLAGS = @BOOST_LDFLAGS@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHANNEL_SCORER_FALSE = @CHANNEL_SCORER_FALSE@ +CHANNEL_SCORER_TRUE = @CHANNEL_SCORER_TRUE@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +IRSTLM_FALSE = @IRSTLM_FALSE@ +IRSTLM_TRUE = @IRSTLM_TRUE@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +ACLOCAL_AMFLAGS = -I m4 +AUTOMAKE_OPTIONS = foreign +AM_CXXFLAGS = $(BOOST_CPPFLAGS) -Wall -ffast-math -ftrapping-math -fomit-frame-pointer +memscore_SOURCES = datastorage.h memscore.h phrasetable.h scorer.h \ + scorer-impl.h statistic.h timestamp.h phrasetable.cpp \ + memscore.cpp scorer.cpp lexdecom.cpp lexdecom.h \ + $(am__append_1) $(am__append_2) +memscore_LDADD = $(IRSTLM_LIBS) $(GSL_LIBS) +all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .cpp .o .obj +am--refresh: + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \ + cd $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) + +config.h: stamp-h1 + @if test ! -f $@; then \ + rm -f stamp-h1; \ + $(MAKE) stamp-h1; \ + else :; fi + +stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status config.h +$(srcdir)/config.h.in: $(am__configure_deps) + cd $(top_srcdir) && $(AUTOHEADER) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f config.h stamp-h1 +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)" + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + if test -f $$p \ + ; then \ + f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \ + $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \ + else :; fi; \ + done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ + rm -f "$(DESTDIR)$(bindir)/$$f"; \ + done + +clean-binPROGRAMS: + -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) +memscore$(EXEEXT): $(memscore_OBJECTS) $(memscore_DEPENDENCIES) + @rm -f memscore$(EXEEXT) + $(CXXLINK) $(memscore_LDFLAGS) $(memscore_OBJECTS) $(memscore_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/channel-scorer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lexdecom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memscore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phraselm.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phrasetable.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scorer.Po@am__quote@ + +.cpp.o: +@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` +uninstall-info-am: + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + $(am__remove_distdir) + mkdir $(distdir) + $(mkdir_p) $(distdir)/m4 + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r $(distdir) +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 + $(am__remove_distdir) + +dist-tarZ: distdir + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__remove_distdir) + +dist-shar: distdir + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__remove_distdir) + +dist dist-all: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir); chmod a+w $(distdir) + mkdir $(distdir)/_build + mkdir $(distdir)/_inst + chmod a-w $(distdir) + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && cd $(distdir)/_build \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck + $(am__remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e '1{h;s/./=/g;p;x;}' -e '$${p;x;}' +distuninstallcheck: + @cd $(distuninstallcheck_dir) \ + && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) config.h +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(mkdir_p) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: install-binPROGRAMS + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-info-am + +.PHONY: CTAGS GTAGS all all-am am--refresh check check-am clean \ + clean-binPROGRAMS clean-generic ctags dist dist-all dist-bzip2 \ + dist-gzip dist-shar dist-tarZ dist-zip distcheck distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-tags distcleancheck distdir distuninstallcheck dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-exec \ + install-exec-am install-info install-info-am install-man \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-info-am + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mosesdecoder/contrib/memscore/configure.ac b/mosesdecoder/contrib/memscore/configure.ac new file mode 100644 index 0000000000000000000000000000000000000000..23af85df23a20d4beaf6a85fd72cae4952754b17 --- /dev/null +++ b/mosesdecoder/contrib/memscore/configure.ac @@ -0,0 +1,84 @@ +# memscore - in-memory phrase scoring for Statistical Machine Translation +# Christian Hardmeier, FBK-irst, Trento, 2010 +# $Id$ + +# Process this file with autoconf to produce a configure script. + +AC_INIT([memscore], [1.0], [hardmeier at fbk.eu]) +AM_INIT_AUTOMAKE +AC_LANG([C++]) + +AC_ARG_WITH(irstlm, + [AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to the IRSTLM toolkit])], + [with_irstlm=$withval], + [with_irstlm=check]) + +AC_ARG_WITH([gsl], + [AC_HELP_STRING([--with-gsl=PATH], [path to the GSL library])], + [with_gsl=$withval + CPPFLAGS="$CPPFLAGS -I$with_gsl/include" + LDFLAGS="$LDFLAGS -L$with_gsl/lib"], + [with_gsl=check]) + +AC_ARG_ENABLE([channel], + [AC_HELP_STRING([--enable-channel], [feature not yet publicly available])], + [AC_DEFINE(ENABLE_CHANNEL_SCORER, [], [Define to enable channel scorer])], + [enable_channel=no]) + +AC_PREREQ([2.63]) +AC_CONFIG_SRCDIR([memscore.cpp]) +AC_CONFIG_HEADERS([config.h]) + +# Checks for programs. +AC_PROG_CXX +AC_PROG_CC + +# Checks for libraries. +AX_BOOST_BASE([1.35.0]) + +AC_CHECK_LIB([m], [cos]) +AC_CHECK_LIB([z], [gzopen]) + +have_gsl=yes +AC_CHECK_LIB([gslcblas],[cblas_dgemm], [], [have_gsl=no]) +AC_CHECK_LIB([gsl],[gsl_blas_dgemm], [], [have_gsl=no]) + +AS_IF([test x$with_irstlm = xcheck], + [AC_CHECK_HEADER([n_gram.h], + [AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])], + [with_irstlm=no])] +, + [SAVE_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include" + + AC_CHECK_HEADER(n_gram.h, + [AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])], + [AC_MSG_ERROR([Cannot find IRSTLM!])]) + + MY_ARCH=`uname -m` + LIB_IRSTLM="-lirstlm" + LDFLAGS="$LDFLAGS -L${with_irstlm}/lib/${MY_ARCH}" + LIBS="$LIBS $LIB_IRSTLM" + FMTLIBS="$FMTLIBS libirstlm.a"] +) +AM_CONDITIONAL([IRSTLM], [test x$with_irstlm != xno]) + +AS_IF([test x$enable_channel = xyes], + [AS_IF([test x$with_irstlm = xno || test x$have_gsl = xno], + [AC_MSG_ERROR([The channel scorer needs both GSL and irstlm.])])]) + +# Checks for header files. +#AC_CHECK_HEADERS([fenv.h sys/time.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_TYPE_SIZE_T +AC_CHECK_TYPES([ptrdiff_t]) + +# Checks for library functions. +#AC_FUNC_MALLOC +#AC_CHECK_FUNCS([getpagesize gettimeofday]) + +AM_CONDITIONAL(CHANNEL_SCORER, test x$enable_channel = xyes) + +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/mosesdecoder/contrib/memscore/lexdecom.h b/mosesdecoder/contrib/memscore/lexdecom.h new file mode 100644 index 0000000000000000000000000000000000000000..1d87caf66b8bcbc6838c5c636e8826ace8b24cf7 --- /dev/null +++ b/mosesdecoder/contrib/memscore/lexdecom.h @@ -0,0 +1,41 @@ +/* + * File: lexdecom.h + * Author: Felipe Sánchez-Martínez, Universitat d'Alacant + * + * Created on 2010/01/27 + */ + +#ifndef _LEXDECOM_H +#define _LEXDECOM_H + +#include "phrasetable.h" +#include "scorer.h" + +class LexicalDecompositionPhraseScorer : public PhraseScorer +{ +private: + explicit LexicalDecompositionPhraseScorer(PhraseTable &pd, bool reverse, const String &lwfile, + const char *argv[], int &argp, const PhraseScorerFactory &ptf); + + virtual void do_score_phrases(); + virtual Score do_get_score(const PhraseTable::const_iterator &it); + + Score get_weight(const String &s_src, const String &s_tgt) const; + Score get_weight(Count src, Count tgt) const; + + typedef std::map, Score> WeightMapType_; + + WeightMapType_ weight_map_; + + // p(J|I) = probability of source-length J given target-length I + std::map > prob_srclen_tgtlen_; + + Score get_noisy_or_combination(Count src_word, PhraseInfo &tgt_phrase); + + PhraseScorer* black_box_scorer; + +public: + static PhraseScorer *create_scorer(const char *argv[], int &argp, bool reverse, const PhraseScorerFactory &ptf); +}; + +#endif /* _LEXDECOM_H */ diff --git a/mosesdecoder/contrib/memscore/memscore.cpp b/mosesdecoder/contrib/memscore/memscore.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c723b236e51ff34394acba2f4b5b2b9f88214819 --- /dev/null +++ b/mosesdecoder/contrib/memscore/memscore.cpp @@ -0,0 +1,85 @@ +// memscore - in-memory phrase scoring for Statistical Machine Translation +// Christian Hardmeier, FBK-irst, Trento, 2010 +// $Id$ + +#include +#include + +#include "phrasetable.h" +#include "scorer.h" + +const char *progname; + +typedef PhrasePairInfo::AlignmentVector::value_type VP; + +bool cmp_counts(const VP &a1, const VP &a2); +int main(int argc, const char *argv[]); + +bool cmp_counts(const VP &a1, const VP &a2) +{ + return a1.second < a2.second; +} + +int main(int argc, const char *argv[]) +{ + progname = argv[0]; + + if(argc == 1) { + std::cerr << "No scorers specified." << std::endl; + usage(); + } + + MemoryPhraseTable pt; + PhraseScorerFactory psf(pt); + + typedef std::vector ScorerList; + ScorerList scorers; + + for(int argp = 1; argp < argc; ) { + bool reverse; + if(!strcmp(argv[argp], "-s")) + reverse = false; + else if(!strcmp(argv[argp], "-r")) + reverse = true; + else + usage(); + + scorers.push_back(psf.create_scorer(argv, ++argp, reverse)); + } + + pt.load_data(std::cin); + pt.compute_phrase_statistics(); + + for(ScorerList::iterator s = scorers.begin(); s != scorers.end(); ++s) + (*s)->score_phrases(); + + for(PhrasePairCounts::const_iterator it = pt.raw_begin(); it != pt.raw_end(); ++it) { + PhrasePairInfo ppi(it); + Phrase src = ppi.get_src(); + Phrase tgt = ppi.get_tgt(); + const PhrasePairInfo::AlignmentVector av = ppi.get_alignments(); + + PhraseAlignment alig = std::max_element(av.begin(), av.end(), cmp_counts)->first; + + std::cout << pt.get_src_phrase(src) << " ||| " << pt.get_tgt_phrase(tgt) << " ||| " << alig << " |||"; + + for(ScorerList::iterator s = scorers.begin(); s != scorers.end(); ++s) + std::cout << ' ' << (*s)->get_score(it); + std::cout << '\n'; // don't use std::endl to avoid flushing + } +} + +void usage() +{ + std::cerr << "Usage: " << progname << " ..." << std::endl << + " where each scorer is specified as" << std::endl << + " -s to estimate p(s|t)" << std::endl << + " -r to estimate p(t|s)" << std::endl << std::endl; + + std::cerr << "Implemented scorers:" << std::endl; + + const std::vector &v = PhraseScorerFactory::scorer_list(); + std::copy(v.begin(), v.end(), std::ostream_iterator(std::cerr, "\n")); + + exit(1); +} diff --git a/mosesdecoder/contrib/memscore/memscore.h b/mosesdecoder/contrib/memscore/memscore.h new file mode 100644 index 0000000000000000000000000000000000000000..9b17691e185bf745e163f52424d626c5a7673f85 --- /dev/null +++ b/mosesdecoder/contrib/memscore/memscore.h @@ -0,0 +1,57 @@ +// memscore - in-memory phrase scoring for Statistical Machine Translation +// Christian Hardmeier, FBK-irst, Trento, 2010 +// $Id$ + +#ifndef MEMSCORE_H +#define MEMSCORE_H + +#include +#include +#include + +#include "config.h" + +#ifndef HAVE_PTRDIFF_T +typedef long ptrdiff_t; +#endif + +#ifdef __GNUC__ +#define NORETURN __attribute__ ((noreturn)) +#else +#define NORETURN +#endif + +void usage() NORETURN; + +typedef double Score; +typedef unsigned int Count; +typedef unsigned int Phrase; +typedef ptrdiff_t DataIndex; +typedef std::pair PhrasePair; +typedef char *PhrasePairData; +typedef std::string String; +typedef std::istringstream IStringStream; + +/* phrasetable.h */ + +class PhraseText; +class PhraseInfo; +class PhraseInfoList; +class PhraseAlignment; +class PhrasePairInfo; +class PhraseTable; + +/* scorer.h */ + +class PhraseScorer; + +/* statistic.h */ + +class PhraseStatistic; + +/* IRSTLM */ + +class lmtable; +class ngram; + +#endif diff --git a/mosesdecoder/contrib/memscore/missing b/mosesdecoder/contrib/memscore/missing new file mode 100644 index 0000000000000000000000000000000000000000..894e786e16c1d0d94dfc08d6b475270fe1418d6a --- /dev/null +++ b/mosesdecoder/contrib/memscore/missing @@ -0,0 +1,360 @@ +#! /bin/sh +# Common stub for a few missing GNU programs while installing. + +scriptversion=2005-06-08.21 + +# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# Originally by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 +fi + +run=: + +# In the cases where this matters, `missing' is being run in the +# srcdir already. +if test -f configure.ac; then + configure_ac=configure.ac +else + configure_ac=configure.in +fi + +msg="missing on your system" + +case "$1" in +--run) + # Try to run requested program, and just exit if it succeeds. + run= + shift + "$@" && exit 0 + # Exit code 63 means version mismatch. This often happens + # when the user try to use an ancient version of a tool on + # a file that requires a minimum version. In this case we + # we should proceed has if the program had been absent, or + # if --run hadn't been passed. + if test $? = 63; then + run=: + msg="probably too old" + fi + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an +error status if there is no known handling for PROGRAM. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + --run try to run the given command, and emulate it if it fails + +Supported PROGRAM values: + aclocal touch file \`aclocal.m4' + autoconf touch file \`configure' + autoheader touch file \`config.h.in' + automake touch all \`Makefile.in' files + bison create \`y.tab.[ch]', if possible, from existing .[ch] + flex create \`lex.yy.c', if possible, from existing .c + help2man touch the output file + lex create \`lex.yy.c', if possible, from existing .c + makeinfo touch the output file + tar try tar, gnutar, gtar, then tar without non-portable flags + yacc create \`y.tab.[ch]', if possible, from existing .[ch] + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: Unknown \`$1' option" + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 + ;; + +esac + +# Now exit if we have it, but it failed. Also exit now if we +# don't have it and --version was passed (most likely to detect +# the program). +case "$1" in + lex|yacc) + # Not GNU programs, they don't have --version. + ;; + + tar) + if test -n "$run"; then + echo 1>&2 "ERROR: \`tar' requires --run" + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + exit 1 + fi + ;; + + *) + if test -z "$run" && ($1 --version) > /dev/null 2>&1; then + # We have it, but it failed. + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + # Could not run --version or --help. This is probably someone + # running `$TOOL --version' or `$TOOL --help' to check whether + # $TOOL exists and not knowing $TOOL uses missing. + exit 1 + fi + ;; +esac + +# If it does not exist, or fails to run (possibly an outdated version), +# try to emulate it. +case "$1" in + aclocal*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acinclude.m4' or \`${configure_ac}'. You might want + to install the \`Automake' and \`Perl' packages. Grab them from + any GNU archive site." + touch aclocal.m4 + ;; + + autoconf) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`${configure_ac}'. You might want to install the + \`Autoconf' and \`GNU m4' packages. Grab them from any GNU + archive site." + touch configure + ;; + + autoheader) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acconfig.h' or \`${configure_ac}'. You might want + to install the \`Autoconf' and \`GNU m4' packages. Grab them + from any GNU archive site." + files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` + test -z "$files" && files="config.h" + touch_files= + for f in $files; do + case "$f" in + *:*) touch_files="$touch_files "`echo "$f" | + sed -e 's/^[^:]*://' -e 's/:.*//'`;; + *) touch_files="$touch_files $f.in";; + esac + done + touch $touch_files + ;; + + automake*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. + You might want to install the \`Automake' and \`Perl' packages. + Grab them from any GNU archive site." + find . -type f -name Makefile.am -print | + sed 's/\.am$/.in/' | + while read f; do touch "$f"; done + ;; + + autom4te) + echo 1>&2 "\ +WARNING: \`$1' is needed, but is $msg. + You might have modified some files without having the + proper tools for further handling them. + You can get \`$1' as part of \`Autoconf' from any GNU + archive site." + + file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` + test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` + if test -f "$file"; then + touch $file + else + test -z "$file" || exec >$file + echo "#! /bin/sh" + echo "# Created by GNU Automake missing as a replacement of" + echo "# $ $@" + echo "exit 0" + chmod +x $file + exit 1 + fi + ;; + + bison|yacc) + echo 1>&2 "\ +WARNING: \`$1' $msg. You should only need it if + you modified a \`.y' file. You may need the \`Bison' package + in order for those modifications to take effect. You can get + \`Bison' from any GNU archive site." + rm -f y.tab.c y.tab.h + if [ $# -ne 1 ]; then + eval LASTARG="\${$#}" + case "$LASTARG" in + *.y) + SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" y.tab.c + fi + SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" y.tab.h + fi + ;; + esac + fi + if [ ! -f y.tab.h ]; then + echo >y.tab.h + fi + if [ ! -f y.tab.c ]; then + echo 'main() { return 0; }' >y.tab.c + fi + ;; + + lex|flex) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.l' file. You may need the \`Flex' package + in order for those modifications to take effect. You can get + \`Flex' from any GNU archive site." + rm -f lex.yy.c + if [ $# -ne 1 ]; then + eval LASTARG="\${$#}" + case "$LASTARG" in + *.l) + SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` + if [ -f "$SRCFILE" ]; then + cp "$SRCFILE" lex.yy.c + fi + ;; + esac + fi + if [ ! -f lex.yy.c ]; then + echo 'main() { return 0; }' >lex.yy.c + fi + ;; + + help2man) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a dependency of a manual page. You may need the + \`Help2man' package in order for those modifications to take + effect. You can get \`Help2man' from any GNU archive site." + + file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` + if test -z "$file"; then + file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` + fi + if [ -f "$file" ]; then + touch $file + else + test -z "$file" || exec >$file + echo ".ab help2man is required to generate this page" + exit 1 + fi + ;; + + makeinfo) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.texi' or \`.texinfo' file, or any other file + indirectly affecting the aspect of the manual. The spurious + call might also be the consequence of using a buggy \`make' (AIX, + DU, IRIX). You might want to install the \`Texinfo' package or + the \`GNU make' package. Grab either from any GNU archive site." + # The file to touch is that specified with -o ... + file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` + if test -z "$file"; then + # ... or it is the one specified with @setfilename ... + infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` + file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $infile` + # ... or it is derived from the source name (dir/f.texi becomes f.info) + test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info + fi + # If the file does not exist, the user really needs makeinfo; + # let's fail without touching anything. + test -f $file || exit 1 + touch $file + ;; + + tar) + shift + + # We have already tried tar in the generic part. + # Look for gnutar/gtar before invocation to avoid ugly error + # messages. + if (gnutar --version > /dev/null 2>&1); then + gnutar "$@" && exit 0 + fi + if (gtar --version > /dev/null 2>&1); then + gtar "$@" && exit 0 + fi + firstarg="$1" + if shift; then + case "$firstarg" in + *o*) + firstarg=`echo "$firstarg" | sed s/o//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + case "$firstarg" in + *h*) + firstarg=`echo "$firstarg" | sed s/h//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + fi + + echo 1>&2 "\ +WARNING: I can't seem to be able to run \`tar' with the given arguments. + You may want to install GNU tar or Free paxutils, or check the + command line arguments." + exit 1 + ;; + + *) + echo 1>&2 "\ +WARNING: \`$1' is needed, and is $msg. + You might have modified some files without having the + proper tools for further handling them. Check the \`README' file, + it often tells you about the needed prerequisites for installing + this package. You may also peek at any GNU archive site, in case + some other package would contain this missing \`$1' program." + exit 1 + ;; +esac + +exit 0 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/mosesdecoder/contrib/memscore/phraselm.h b/mosesdecoder/contrib/memscore/phraselm.h new file mode 100644 index 0000000000000000000000000000000000000000..62e8f08d40cddf461b1fdff2c8b15eb58e332325 --- /dev/null +++ b/mosesdecoder/contrib/memscore/phraselm.h @@ -0,0 +1,45 @@ +// memscore - in-memory phrase scoring for Statistical Machine Translation +// Christian Hardmeier, FBK-irst, Trento, 2010 +// $Id$ + +#ifndef PHRASELM_H +#define PHRASELM_H + +#include + +#include "memscore.h" +#include "phrasetable.h" +#include "statistic.h" + +class lmtable; + +class PhraseLanguageModel : public PhraseStatistic +{ +protected: + String lmfile_; + Count score_idx_; + + PhraseInfoList *phrase_info_list_; + + void compute_lmscores(PhraseInfoList &phrase_info_list, bool closed_world); + +public: + PhraseLanguageModel(String lmfile) : lmfile_(lmfile) {} + + virtual void attach(PhraseInfoList &pilist); + virtual void compute_statistic(); + + virtual Score get_score(PhraseInfo &pi) { + assert(computation_done_); + return pi.data(score_idx_); + } +}; + +class ClosedPhraseLanguageModel : public PhraseLanguageModel +{ +public: + ClosedPhraseLanguageModel(String lmfile) : PhraseLanguageModel(lmfile) {} + virtual void compute_statistic(); +}; + +#endif diff --git a/mosesdecoder/contrib/memscore/phrasetable.cpp b/mosesdecoder/contrib/memscore/phrasetable.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f430c289a181dff03cd67b902511bf7f0f6c574 --- /dev/null +++ b/mosesdecoder/contrib/memscore/phrasetable.cpp @@ -0,0 +1,348 @@ +// memscore - in-memory phrase scoring for Statistical Machine Translation +// Christian Hardmeier, FBK-irst, Trento, 2010 +// $Id$ + +#include "phrasetable.h" +#include "statistic.h" +#include "timestamp.h" + +#include +#include +#include +#include + +/* PhraseText */ + +PhraseText::DictionaryType_ PhraseText::dictionary_; +Count PhraseText::last_id_ = 1; + +PhraseText::PhraseText(const String &s) +{ + IStringStream is(s); + while(is.good()) { + String w; + getline(is, w, ' '); + Count *id = boost::fast_pool_allocator::allocate(1); + *id = index_word(w); + word_list_.push_back(id); + } +} + +std::ostream &operator<<(std::ostream &os, const PhraseText &pt) +{ + bool print_space = false; + for(PhraseText::const_string_iterator it = pt.string_begin(); it != pt.string_end(); it++) { + if(print_space) + os << ' '; + else + print_space = true; + + os << *it; + } + + return os; +} + +/* PhraseAlignment */ + +PhraseAlignment::Alignment::AlignmentMapType_ PhraseAlignment::Alignment::alignment_map_; +PhraseAlignment::Alignment::AlignmentVectorType_ PhraseAlignment::Alignment::alignment_vector_; + +PhraseAlignment::Alignment::Alignment(Count slen, Count tlen, const String &alignment) : + slen_(slen), tlen_(tlen), matrix_(slen * tlen, false) +{ + assert(slen_ > 0 && slen_ < 10); + IStringStream is(alignment); + while(is.good()) { + String a; + getline(is, a, ' '); + IStringStream ap(a); + Count s, t; + char dash; + ap >> s >> dash >> t; + assert(s < slen && t < tlen); + assert(dash == '-'); + matrix_[t * slen + s] = true; + } +} + +Count PhraseAlignment::Alignment::index_alignment(Count slen, Count tlen, const String &alignment) +{ + AlignmentTuple_ tup = boost::make_tuple(slen, tlen, alignment); + AlignmentMapType_::const_iterator it = alignment_map_.find(tup); + + if(it == alignment_map_.end()) { + const Alignment *pa = new Alignment(slen, tlen, alignment); + Count index = alignment_vector_.size(); + alignment_map_.insert(std::make_pair(tup, index)); + alignment_vector_.push_back(pa); + return index; + } else + return it->second; +} + +std::ostream &operator<<(std::ostream &os, const PhraseAlignment::Alignment &pa) +{ + bool print_space = false; + for(Count i = 0; i < pa.matrix_.size(); i++) { + if(print_space) + os << ' '; + else + print_space = true; + + os << (i / pa.slen_) << '-' << (i % pa.slen_); + } + + return os; +} + +std::ostream &operator<<(std::ostream &os, const PhraseAlignment &pa) +{ + for(Count s = 0; s < pa.get_source_length(); s++) { + os << '('; + bool print_comma = false; + for(Count t = 0; t < pa.get_target_length(); t++) { + if(pa.is_aligned(s, t)) { + if(print_comma) + os << ','; + else + print_comma = true; + + os << t; + } + } + os << ") "; + } + + os << "|||"; + + for(Count t = 0; t < pa.get_target_length(); t++) { + os << " ("; + bool print_comma = false; + for(Count s = 0; s < pa.get_source_length(); s++) { + if(pa.is_aligned(s, t)) { + if(print_comma) + os << ','; + else + print_comma = true; + + os << s; + } + } + os << ')'; + } + + return os; +} + +/* PhrasePairInfo */ + +bool PhrasePairInfo::init_phase_ = true; +Count PhrasePairInfo::data_ncounts_ = COUNT_FREE_IDX; +Count PhrasePairInfo::data_nscores_ = SCORE_FREE_IDX; +const Count PhrasePairInfo::CONTINUATION_BIT = 1 << (std::numeric_limits::digits - 1); + +PhrasePairInfo::PhrasePairInfo(Count src, Count tgt, Count alignment, Count count) : + src_(src), tgt_(tgt), data_(NULL), reverse_(false) +{ + init_phase_ = false; + realloc_data(1); + count_data(COUNT_COUNT_IDX) = count; + Count *aligd = alignment_data(0); + aligd[0] = alignment; + aligd[1] = count; +} + +DataIndex PhrasePairInfo::register_score_data(Count size) +{ + assert(init_phase_); + + Count start = data_nscores_; + data_nscores_ += size; + return start; +} + +DataIndex PhrasePairInfo::register_count_data(Count size) +{ + assert(init_phase_); + + Count start = data_ncounts_; + data_ncounts_ += size; + return start; +} + +PhrasePairInfo::AlignmentVector PhrasePairInfo::get_alignments() const +{ + PhrasePairInfo::AlignmentVector vec; + + Count i = 0; + bool last; + do { + const Count *aligd = alignment_data(i++); + last = !(aligd[0] & CONTINUATION_BIT); + Count alig = aligd[0] & ~CONTINUATION_BIT; + vec.push_back(std::make_pair(PhraseAlignment(alig, reverse_), aligd[1])); + } while(!last); + + return vec; +} + +void PhrasePairInfo::add_alignment(Count new_alignment) +{ + Count i = 0; + bool last; + do { + Count *aligd = alignment_data(i++); + last = !(aligd[0] & CONTINUATION_BIT); + Count alig = aligd[0] & ~CONTINUATION_BIT; + if(alig == new_alignment) { + aligd[1]++; + return; + } + } while(!last); + + realloc_data(i + 1); + + Count *last_aligd = alignment_data(i - 1); + last_aligd[0] |= CONTINUATION_BIT; + + Count *this_aligd = alignment_data(i); + this_aligd[0] = new_alignment; + this_aligd[1] = 1; +} + +void PhrasePairInfo::realloc_data(Count nalignments) +{ + static boost::pool<> *pool[3] = { NULL, NULL, NULL }; + + size_t fixed_size = data_nscores_ * sizeof(Score) + data_ncounts_ * sizeof(Count); + size_t new_data_size = fixed_size + COUNTS_PER_ALIGNMENT * nalignments * sizeof(Count); + + PhrasePairData new_data; + if(nalignments <= 3) { + if(!pool[nalignments - 1]) + pool[nalignments - 1] = new boost::pool<>(new_data_size); + + new_data = reinterpret_cast(pool[nalignments - 1]->malloc()); + } else + new_data = new char[new_data_size]; + + if(data_) { + memcpy(new_data, data_, fixed_size); + Count i = 0; + Count *old_aligd, *new_aligd; + do { + assert(i < nalignments); + old_aligd = alignment_data(data_, i); + new_aligd = alignment_data(new_data, i); + new_aligd[0] = old_aligd[0]; + new_aligd[1] = old_aligd[1]; + i++; + } while(old_aligd[0] & CONTINUATION_BIT); + if(nalignments <= 4) + pool[nalignments - 2]->free(data_); + else + delete[] data_; + } + + data_ = new_data; +} + +/* PhraseInfoList */ + +Phrase PhraseInfoList::index_phrase(const String &s_phr) +{ + IDMapType_::const_iterator it = idmap_.find(s_phr); + if(it != idmap_.end()) + return it->second; + + PhraseInfo *pi = phrase_info_pool_.construct(data_size_, s_phr); + + list_.push_back(pi); + idmap_[s_phr] = list_.size() - 1; + return idmap_[s_phr]; +} + +DataIndex PhraseInfoList::register_data(Count size) +{ + DataIndex start = data_size_; + data_size_ += size; + return start; +} + +void PhraseInfoList::attach_statistic(PhraseStatistic &s) +{ + statistics_.push_back(&s); + s.attach(*this); +} + +void PhraseInfoList::compute_statistics() +{ + while(!statistics_.empty()) { + statistics_.front()->compute_statistic(); + statistics_.pop_front(); + } +} + +/* PhraseTable */ + +void MemoryPhraseTable::load_data(std::istream &instream) +{ + Count total_count = 0; + + Timestamp t_load; + Count nlines = 1; + String line; + while(getline(instream, line)) { + size_t sep1 = line.find(" ||| "); + if(sep1 == line.npos) { + std::cerr << "Phrase separator not found in: " << line << std::endl; + abort(); + } + size_t sep2 = line.find(" ||| ", sep1 + 1); + String s_src(line, 0, sep1); + String s_tgt(line, sep1 + 5, sep2 - sep1 - 5); + String s_alignment(line, sep2 + 5); + + Phrase src = src_info_.index_phrase(s_src); + Phrase tgt = tgt_info_.index_phrase(s_tgt); + Count alignment = PhraseAlignment::index_alignment(src_info_[src].get_phrase().size(), tgt_info_[tgt].get_phrase().size(), s_alignment); + + src_info_[src].inc_count(); + tgt_info_[tgt].inc_count(); + total_count++; + + PhrasePair stpair(src, tgt); + PhrasePairCounts::iterator it = joint_counts_.find(stpair); + + if(it == joint_counts_.end()) { + src_info_[src].inc_distinct(); + tgt_info_[tgt].inc_distinct(); + joint_counts_.insert(std::make_pair(stpair, PhrasePairInfo(src, tgt, alignment, 1).get_phrase_pair_data())); + } else { + PhrasePairInfo pi(src, tgt, it->second); + pi.inc_count(); + pi.add_alignment(alignment); + it->second = pi.get_phrase_pair_data(); // may have changed by adding the alignment + } + if(nlines % 50000 == 0) + std:: cerr << "Read " << nlines << " lines in " << (t_load.elapsed_time() / 1000) << " ms." << std::endl; + nlines++; + } +} + +void MemoryPhraseTable::attach_src_statistic(PhraseStatistic &s) +{ + src_info_.attach_statistic(s); +} + +void MemoryPhraseTable::attach_tgt_statistic(PhraseStatistic &s) +{ + tgt_info_.attach_statistic(s); +} + +void MemoryPhraseTable::compute_phrase_statistics() +{ + src_info_.compute_statistics(); + tgt_info_.compute_statistics(); +} diff --git a/mosesdecoder/contrib/memscore/scorer.h b/mosesdecoder/contrib/memscore/scorer.h new file mode 100644 index 0000000000000000000000000000000000000000..332f014c5f387a70839e7d0e28c9ede2a89c4adc --- /dev/null +++ b/mosesdecoder/contrib/memscore/scorer.h @@ -0,0 +1,71 @@ +// memscore - in-memory phrase scoring for Statistical Machine Translation +// Christian Hardmeier, FBK-irst, Trento, 2010 +// $Id$ + +#ifndef SCORER_H +#define SCORER_H + +#include "memscore.h" + +class PhraseScorerFactory +{ +private: + PhraseTable &phrase_table_; + +public: + explicit PhraseScorerFactory(PhraseTable &phrase_table) : + phrase_table_(phrase_table) {} + + PhraseScorer *create_scorer(const char *argv[], int &argp, bool reverse); + + PhraseTable &get_phrase_table() const { + return phrase_table_; + } + + static const std::vector &scorer_list(); +}; + +class PhraseScorer +{ +protected: + PhraseTable &phrase_table_; + bool reverse_; + + explicit PhraseScorer(PhraseTable &pt, bool reverse) : + phrase_table_(!reverse ? pt : pt.reverse()), reverse_(reverse) {} + + PhraseTable::iterator get_pair(Phrase src, Phrase tgt) { + PhraseTable::iterator it = phrase_table_.find(std::make_pair(src, tgt)); + assert(it != phrase_table_.end()); + return it; + } + +private: + virtual void do_score_phrases() {} + + virtual Score do_get_score(const PhraseTable::const_iterator &it) = 0; + +public: + virtual ~PhraseScorer() {} + + virtual Score get_discount() {} + + void score_phrases() { + do_score_phrases(); + } + + Score get_score(const PhrasePairCounts::const_iterator &it) { + return do_get_score(phrase_table_.find(it)); + } + + Score get_score(const PhraseTable::const_iterator &it) { + return do_get_score(it); + } + + Score get_score(Phrase src, Phrase tgt) { + PhraseTable::const_iterator it = get_pair(src, tgt); + return do_get_score(it); + } +}; + +#endif diff --git a/mosesdecoder/contrib/memscore/timestamp.h b/mosesdecoder/contrib/memscore/timestamp.h new file mode 100644 index 0000000000000000000000000000000000000000..fadb9cc8b323d43f2bb6a485751f04ab269a451b --- /dev/null +++ b/mosesdecoder/contrib/memscore/timestamp.h @@ -0,0 +1,29 @@ +// memscore - in-memory phrase scoring for Statistical Machine Translation +// Christian Hardmeier, FBK-irst, Trento, 2010 +// $Id$ + +#ifndef TIMESTAMP_H +#define TIMESTAMP_H + +#include + +class Timestamp +{ +private: + struct timeval tv_; + +public: + typedef double time_difference; + + Timestamp() { + gettimeofday(&tv_, NULL); + } + + time_difference elapsed_time() const { + struct timeval tv2; + gettimeofday(&tv2, NULL); + return (tv2.tv_sec - tv_.tv_sec) * 1e6 + (tv2.tv_usec - tv_.tv_usec); + } +}; + +#endif diff --git a/mosesdecoder/contrib/mira/Main.cpp b/mosesdecoder/contrib/mira/Main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..acc2f888689bd839f3b6ddb82c58750380c84959 --- /dev/null +++ b/mosesdecoder/contrib/mira/Main.cpp @@ -0,0 +1,1849 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2010 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef MPI_ENABLE +#include +namespace mpi = boost::mpi; +#endif + +#include "Main.h" +#include "Optimiser.h" +#include "Hildreth.h" +#include "HypothesisQueue.h" +#include "moses/StaticData.h" +#include "moses/ScoreComponentCollection.h" +#include "moses/ThreadPool.h" +#include "mert/BleuScorer.h" +#include "moses/FeatureVector.h" + +#include "moses/FF/WordTranslationFeature.h" +#include "moses/FF/PhrasePairFeature.h" +#include "moses/FF/WordPenaltyProducer.h" +#include "moses/LM/Base.h" +#include "util/random.hh" + +using namespace Mira; +using namespace std; +using namespace Moses; +namespace po = boost::program_options; + +int main(int argc, char** argv) +{ + util::rand_init(); + size_t rank = 0; + size_t size = 1; +#ifdef MPI_ENABLE + mpi::environment env(argc,argv); + mpi::communicator world; + rank = world.rank(); + size = world.size(); +#endif + + bool help; + int verbosity; + string mosesConfigFile; + string inputFile; + vector referenceFiles; + vector mosesConfigFilesFolds, inputFilesFolds, referenceFilesFolds; + // string coreWeightFile, startWeightFile; + size_t epochs; + string learner; + bool shuffle; + size_t mixingFrequency; + size_t weightDumpFrequency; + string weightDumpStem; + bool scale_margin; + bool scale_update; + size_t n; + size_t batchSize; + bool distinctNbest; + bool accumulateWeights; + float historySmoothing; + bool scaleByInputLength, scaleByAvgInputLength; + bool scaleByInverseLength, scaleByAvgInverseLength; + float scaleByX; + float slack; + bool averageWeights; + bool weightConvergence; + float learning_rate; + float mira_learning_rate; + float perceptron_learning_rate; + string decoder_settings; + float min_weight_change; + bool normaliseWeights, normaliseMargin; + bool print_feature_values; + bool historyBleu ; + bool sentenceBleu; + bool perceptron_update; + bool hope_fear; + bool model_hope_fear; + size_t hope_n, fear_n; + size_t bleu_smoothing_scheme; + float min_oracle_bleu; + float minBleuRatio, maxBleuRatio; + bool boost; + bool decode_hope, decode_fear, decode_model; + string decode_filename; + bool batchEqualsShard; + bool sparseAverage, dumpMixedWeights, sparseNoAverage; + int featureCutoff; + bool pruneZeroWeights; + bool printFeatureCounts, printNbestWithFeatures; + bool avgRefLength; + bool print_weights, print_core_weights, debug_model, scale_lm, scale_wp; + float scale_lm_factor, scale_wp_factor; + bool kbest; + string moses_src; + float sigmoidParam; + float bleuWeight, bleuWeight_hope, bleuWeight_fear; + bool bleu_weight_lm; + float bleu_weight_lm_factor; + bool l1_regularize, l2_regularize, l1_reg_sparse, l2_reg_sparse; + float l1_lambda, l2_lambda; + bool most_violated, most_violated_reg, all_violated, max_bleu_diff; + bool feature_confidence, signed_counts; + float decay_core, decay_sparse, core_r0, sparse_r0; + float bleu_weight_fear_factor; + bool hildreth; + float add2lm; + + // compute real sentence Bleu scores on complete translations, disable Bleu feature + bool realBleu, disableBleuFeature; + bool rescaleSlack; + bool makePairs; + bool debug; + bool reg_on_every_mix; + size_t continue_epoch; + bool modelPlusBleu, simpleHistoryBleu; + po::options_description desc("Allowed options"); + desc.add_options() + ("continue-epoch", po::value(&continue_epoch)->default_value(0), "Continue an interrupted experiment from this epoch on") + ("freq-reg", po::value(®_on_every_mix)->default_value(false), "Regularize after every weight mixing") + ("l1sparse", po::value(&l1_reg_sparse)->default_value(true), "L1-regularization for sparse weights only") + ("l2sparse", po::value(&l2_reg_sparse)->default_value(true), "L2-regularization for sparse weights only") + ("mv-reg", po::value(&most_violated_reg)->default_value(false), "Regularize most violated constraint") + ("most-violated", po::value(&most_violated)->default_value(false), "Add most violated constraint") + ("all-violated", po::value(&all_violated)->default_value(false), "Add all violated constraints") + ("feature-confidence", po::value(&feature_confidence)->default_value(false), "Confidence-weighted learning") + ("signed-counts", po::value(&signed_counts)->default_value(false), "Use signed feature counts for CWL") + ("dbg", po::value(&debug)->default_value(true), "More debug output") + ("make-pairs", po::value(&makePairs)->default_value(true), "Make pairs of hypotheses for 1slack") + ("debug", po::value(&debug)->default_value(true), "More debug output") + ("rescale-slack", po::value(&rescaleSlack)->default_value(false), "Rescale slack in 1-slack formulation") + ("add2lm", po::value(&add2lm)->default_value(0.0), "Add the specified amount to all LM weights") + ("hildreth", po::value(&hildreth)->default_value(false), "Prefer Hildreth over analytical update") + ("model-plus-bleu", po::value(&modelPlusBleu)->default_value(false), "Use the sum of model score and +/- bleu to select hope and fear translations") + ("simple-history-bleu", po::value(&simpleHistoryBleu)->default_value(false), "Simple history Bleu") + + ("bleu-weight", po::value(&bleuWeight)->default_value(1.0), "Bleu weight used in decoder objective") + ("bw-hope", po::value(&bleuWeight_hope)->default_value(-1.0), "Bleu weight used in decoder objective for hope") + ("bw-fear", po::value(&bleuWeight_fear)->default_value(-1.0), "Bleu weight used in decoder objective for fear") + + ("core-r0", po::value(&core_r0)->default_value(1.0), "Start learning rate for core features") + ("sparse-r0", po::value(&sparse_r0)->default_value(1.0), "Start learning rate for sparse features") + ("decay-core", po::value(&decay_core)->default_value(0.01), "Decay for core feature learning rate") + ("decay-sparse", po::value(&decay_sparse)->default_value(0.01), "Decay for sparse feature learning rate") + + ("tie-bw-to-lm", po::value(&bleu_weight_lm)->default_value(true), "Make bleu weight depend on lm weight") + ("bw-lm-factor", po::value(&bleu_weight_lm_factor)->default_value(2.0), "Make bleu weight depend on lm weight by this factor") + ("bw-factor-fear", po::value(&bleu_weight_fear_factor)->default_value(1.0), "Multiply fear weight by this factor") + ("accumulate-weights", po::value(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs") + ("average-weights", po::value(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update") + ("avg-ref-length", po::value(&avgRefLength)->default_value(false), "Use average reference length instead of shortest for BLEU score feature") + ("batch-equals-shard", po::value(&batchEqualsShard)->default_value(false), "Batch size is equal to shard size (purely batch)") + ("batch-size,b", po::value(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments") + ("bleu-smoothing-scheme", po::value(&bleu_smoothing_scheme)->default_value(1), "Set a smoothing scheme for sentence-Bleu: +1 (1), +0.1 (2), papineni (3) (default:1)") + ("boost", po::value(&boost)->default_value(false), "Apply boosting factor to updates on misranked candidates") + ("config,f", po::value(&mosesConfigFile), "Moses ini-file") + ("configs-folds", po::value >(&mosesConfigFilesFolds), "Moses ini-files, one for each fold") + ("debug-model", po::value(&debug_model)->default_value(false), "Get best model translation for debugging purposes") + ("decode-hope", po::value(&decode_hope)->default_value(false), "Decode dev input set according to hope objective") + ("decode-fear", po::value(&decode_fear)->default_value(false), "Decode dev input set according to fear objective") + ("decode-model", po::value(&decode_model)->default_value(false), "Decode dev input set according to normal objective") + ("decode-filename", po::value(&decode_filename), "Filename for Bleu objective translations") + ("decoder-settings", po::value(&decoder_settings)->default_value(""), "Decoder settings for tuning runs") + ("distinct-nbest", po::value(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step") + ("dump-mixed-weights", po::value(&dumpMixedWeights)->default_value(false), "Dump mixed weights instead of averaged weights") + ("epochs,e", po::value(&epochs)->default_value(10), "Number of epochs") + ("feature-cutoff", po::value(&featureCutoff)->default_value(-1), "Feature cutoff as additional regularization for sparse features") + ("fear-n", po::value(&fear_n)->default_value(1), "Number of fear translations used") + ("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit") + ("history-bleu", po::value(&historyBleu)->default_value(false), "Use 1best translations to update the history") + ("history-smoothing", po::value(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing") + ("hope-fear", po::value(&hope_fear)->default_value(true), "Use only hope and fear translations for optimisation (not model)") + ("hope-n", po::value(&hope_n)->default_value(2), "Number of hope translations used") + ("input-file,i", po::value(&inputFile), "Input file containing tokenised source") + ("input-files-folds", po::value >(&inputFilesFolds), "Input files containing tokenised source, one for each fold") + ("learner,l", po::value(&learner)->default_value("mira"), "Learning algorithm") + ("l1-lambda", po::value(&l1_lambda)->default_value(0.0001), "Lambda for l1-regularization (w_i +/- lambda)") + ("l2-lambda", po::value(&l2_lambda)->default_value(0.01), "Lambda for l2-regularization (w_i * (1 - lambda))") + ("l1-reg", po::value(&l1_regularize)->default_value(false), "L1-regularization") + ("l2-reg", po::value(&l2_regularize)->default_value(false), "L2-regularization") + ("min-bleu-ratio", po::value(&minBleuRatio)->default_value(-1), "Set a minimum BLEU ratio between hope and fear") + ("max-bleu-ratio", po::value(&maxBleuRatio)->default_value(-1), "Set a maximum BLEU ratio between hope and fear") + ("max-bleu-diff", po::value(&max_bleu_diff)->default_value(true), "Select hope/fear with maximum Bleu difference") + ("min-oracle-bleu", po::value(&min_oracle_bleu)->default_value(0), "Set a minimum oracle BLEU score") + ("min-weight-change", po::value(&min_weight_change)->default_value(0.0001), "Set minimum weight change for stopping criterion") + ("mira-learning-rate", po::value(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)") + ("mixing-frequency", po::value(&mixingFrequency)->default_value(10), "How often per epoch to mix weights, when using mpi") + ("model-hope-fear", po::value(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation") + ("moses-src", po::value(&moses_src)->default_value(""), "Moses source directory") + ("nbest,n", po::value(&n)->default_value(30), "Number of translations in n-best list") + ("normalise-weights", po::value(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder") + ("normalise-margin", po::value(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1") + ("perceptron-learning-rate", po::value(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate") + ("print-feature-values", po::value(&print_feature_values)->default_value(false), "Print out feature values") + ("print-feature-counts", po::value(&printFeatureCounts)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch") + ("print-nbest-with-features", po::value(&printNbestWithFeatures)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch") + ("print-weights", po::value(&print_weights)->default_value(false), "Print out current weights") + ("print-core-weights", po::value(&print_core_weights)->default_value(true), "Print out current core weights") + ("prune-zero-weights", po::value(&pruneZeroWeights)->default_value(false), "Prune zero-valued sparse feature weights") + ("reference-files,r", po::value >(&referenceFiles), "Reference translation files for training") + ("reference-files-folds", po::value >(&referenceFilesFolds), "Reference translation files for training, one for each fold") + ("kbest", po::value(&kbest)->default_value(true), "Select hope/fear pairs from a list of nbest translations") + + ("scale-by-inverse-length", po::value(&scaleByInverseLength)->default_value(false), "Scale BLEU by (history of) inverse input length") + ("scale-by-input-length", po::value(&scaleByInputLength)->default_value(true), "Scale BLEU by (history of) input length") + ("scale-by-avg-input-length", po::value(&scaleByAvgInputLength)->default_value(false), "Scale BLEU by average input length") + ("scale-by-avg-inverse-length", po::value(&scaleByAvgInverseLength)->default_value(false), "Scale BLEU by average inverse input length") + ("scale-by-x", po::value(&scaleByX)->default_value(0.1), "Scale the BLEU score by value x") + ("scale-lm", po::value(&scale_lm)->default_value(true), "Scale the language model feature") + ("scale-factor-lm", po::value(&scale_lm_factor)->default_value(0.5), "Scale the language model feature by this factor") + ("scale-wp", po::value(&scale_wp)->default_value(false), "Scale the word penalty feature") + ("scale-factor-wp", po::value(&scale_wp_factor)->default_value(2), "Scale the word penalty feature by this factor") + ("scale-margin", po::value(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation") + ("sentence-level-bleu", po::value(&sentenceBleu)->default_value(true), "Use a sentences level Bleu scoring function") + ("shuffle", po::value(&shuffle)->default_value(false), "Shuffle input sentences before processing") + ("sigmoid-param", po::value(&sigmoidParam)->default_value(1), "y=sigmoidParam is the axis that this sigmoid approaches") + ("slack", po::value(&slack)->default_value(0.05), "Use slack in optimiser") + ("sparse-average", po::value(&sparseAverage)->default_value(false), "Average weights by the number of processes") + ("sparse-no-average", po::value(&sparseNoAverage)->default_value(false), "Don't average sparse weights, just sum") + ("stop-weights", po::value(&weightConvergence)->default_value(true), "Stop when weights converge") + ("verbosity,v", po::value(&verbosity)->default_value(0), "Verbosity level") + ("weight-dump-frequency", po::value(&weightDumpFrequency)->default_value(2), "How often per epoch to dump weights (mpi)") + ("weight-dump-stem", po::value(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights"); + + po::options_description cmdline_options; + cmdline_options.add(desc); + po::variables_map vm; + po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm); + po::notify(vm); + + if (help) { + std::cout << "Usage: " + string(argv[0]) + + " -f mosesini-file -i input-file -r reference-file(s) [options]" << std::endl; + std::cout << desc << std::endl; + return 0; + } + + const StaticData &staticData = StaticData::Instance(); + + bool trainWithMultipleFolds = false; + if (mosesConfigFilesFolds.size() > 0 || inputFilesFolds.size() > 0 || referenceFilesFolds.size() > 0) { + if (rank == 0) + cerr << "Training with " << mosesConfigFilesFolds.size() << " folds" << endl; + trainWithMultipleFolds = true; + } + + if (dumpMixedWeights && (mixingFrequency != weightDumpFrequency)) { + cerr << "Set mixing frequency = weight dump frequency for dumping mixed weights!" << endl; + exit(1); + } + + if ((sparseAverage || sparseNoAverage) && averageWeights) { + cerr << "Parameters --sparse-average 1/--sparse-no-average 1 and --average-weights 1 are incompatible (not implemented)" << endl; + exit(1); + } + + if (trainWithMultipleFolds) { + if (!mosesConfigFilesFolds.size()) { + cerr << "Error: No moses ini files specified for training with folds" << endl; + exit(1); + } + + if (!inputFilesFolds.size()) { + cerr << "Error: No input files specified for training with folds" << endl; + exit(1); + } + + if (!referenceFilesFolds.size()) { + cerr << "Error: No reference files specified for training with folds" << endl; + exit(1); + } + } else { + if (mosesConfigFile.empty()) { + cerr << "Error: No moses ini file specified" << endl; + return 1; + } + + if (inputFile.empty()) { + cerr << "Error: No input file specified" << endl; + return 1; + } + + if (!referenceFiles.size()) { + cerr << "Error: No reference files specified" << endl; + return 1; + } + } + + // load input and references + vector inputSentences; + size_t inputSize = trainWithMultipleFolds? inputFilesFolds.size(): 0; + size_t refSize = trainWithMultipleFolds? referenceFilesFolds.size(): referenceFiles.size(); + vector > inputSentencesFolds(inputSize); + vector > referenceSentences(refSize); + + // number of cores for each fold + size_t coresPerFold = 0, myFold = 0; + if (trainWithMultipleFolds) { + if (mosesConfigFilesFolds.size() > size) { + cerr << "Number of cores has to be a multiple of the number of folds" << endl; + exit(1); + } + coresPerFold = size/mosesConfigFilesFolds.size(); + if (size % coresPerFold > 0) { + cerr << "Number of cores has to be a multiple of the number of folds" << endl; + exit(1); + } + + if (rank == 0) + cerr << "Number of cores per fold: " << coresPerFold << endl; + myFold = rank/coresPerFold; + cerr << "Rank " << rank << ", my fold: " << myFold << endl; + } + + // NOTE: we do not actually need the references here, because we are reading them in from StaticData + if (trainWithMultipleFolds) { + if (!loadSentences(inputFilesFolds[myFold], inputSentencesFolds[myFold])) { + cerr << "Error: Failed to load input sentences from " << inputFilesFolds[myFold] << endl; + exit(1); + } + VERBOSE(1, "Rank " << rank << " reading inputs from " << inputFilesFolds[myFold] << endl); + + if (!loadSentences(referenceFilesFolds[myFold], referenceSentences[myFold])) { + cerr << "Error: Failed to load reference sentences from " << referenceFilesFolds[myFold] << endl; + exit(1); + } + if (referenceSentences[myFold].size() != inputSentencesFolds[myFold].size()) { + cerr << "Error: Input file length (" << inputSentencesFolds[myFold].size() << ") != (" + << referenceSentences[myFold].size() << ") reference file length (rank " << rank << ")" << endl; + exit(1); + } + VERBOSE(1, "Rank " << rank << " reading references from " << referenceFilesFolds[myFold] << endl); + } else { + if (!loadSentences(inputFile, inputSentences)) { + cerr << "Error: Failed to load input sentences from " << inputFile << endl; + return 1; + } + + for (size_t i = 0; i < referenceFiles.size(); ++i) { + if (!loadSentences(referenceFiles[i], referenceSentences[i])) { + cerr << "Error: Failed to load reference sentences from " + << referenceFiles[i] << endl; + return 1; + } + if (referenceSentences[i].size() != inputSentences.size()) { + cerr << "Error: Input file length (" << inputSentences.size() << ") != (" + << referenceSentences[i].size() << ") length of reference file " << i + << endl; + return 1; + } + } + } + + if (scaleByAvgInputLength || scaleByInverseLength || scaleByAvgInverseLength) + scaleByInputLength = false; + + if (historyBleu || simpleHistoryBleu) { + sentenceBleu = false; + cerr << "Using history Bleu. " << endl; + } + + if (kbest) { + realBleu = true; + disableBleuFeature = true; + cerr << "Use kbest lists and real Bleu scores, disable Bleu feature.." << endl; + } + + // initialise Moses + // add references to initialize Bleu feature + boost::trim(decoder_settings); + decoder_settings += " -mira -n-best-list - " + boost::lexical_cast(n) + " distinct"; + + vector decoder_params; + boost::split(decoder_params, decoder_settings, boost::is_any_of("\t ")); + + // bleu feature + decoder_params.push_back("-feature-add"); + + decoder_settings = "BleuScoreFeature tuneable=false references="; + if (trainWithMultipleFolds) { + decoder_settings += referenceFilesFolds[myFold]; + } else { + decoder_settings += referenceFiles[0]; + for (size_t i=1; i < referenceFiles.size(); ++i) { + decoder_settings += ","; + decoder_settings += referenceFiles[i]; + } + } + decoder_params.push_back(decoder_settings); + + string configFile = trainWithMultipleFolds? mosesConfigFilesFolds[myFold] : mosesConfigFile; + VERBOSE(1, "Rank " << rank << " reading config file from " << configFile << endl); + MosesDecoder* decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params); + decoder->setBleuParameters(disableBleuFeature, sentenceBleu, scaleByInputLength, scaleByAvgInputLength, + scaleByInverseLength, scaleByAvgInverseLength, + scaleByX, historySmoothing, bleu_smoothing_scheme, simpleHistoryBleu); + bool chartDecoding = staticData.IsChart(); + + // Optionally shuffle the sentences + vector order; + if (trainWithMultipleFolds) { + for (size_t i = 0; i < inputSentencesFolds[myFold].size(); ++i) { + order.push_back(i); + } + } else { + if (rank == 0) { + for (size_t i = 0; i < inputSentences.size(); ++i) { + order.push_back(i); + } + } + } + + // initialise optimizer + Optimiser* optimiser = NULL; + if (learner == "mira") { + if (rank == 0) { + cerr << "Optimising using Mira" << endl; + cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl; + if (normaliseMargin) + cerr << "sigmoid parameter: " << sigmoidParam << endl; + } + optimiser = new MiraOptimiser(slack, scale_margin, scale_update, boost, normaliseMargin, sigmoidParam); + learning_rate = mira_learning_rate; + perceptron_update = false; + } else if (learner == "perceptron") { + if (rank == 0) { + cerr << "Optimising using Perceptron" << endl; + } + optimiser = new Perceptron(); + learning_rate = perceptron_learning_rate; + perceptron_update = true; + model_hope_fear = false; // mira only + hope_fear = false; // mira only + n = 1; + hope_n = 1; + fear_n = 1; + } else { + cerr << "Error: Unknown optimiser: " << learner << endl; + return 1; + } + + // resolve parameter dependencies + if (batchSize > 1 && perceptron_update) { + batchSize = 1; + cerr << "Info: Setting batch size to 1 for perceptron update" << endl; + } + + if (hope_n == 0) + hope_n = n; + if (fear_n == 0) + fear_n = n; + + if (model_hope_fear || kbest) + hope_fear = false; // is true by default + if (learner == "mira" && !(hope_fear || model_hope_fear || kbest)) { + cerr << "Error: Need to select one of parameters --hope-fear/--model-hope-fear/--kbest for mira update." << endl; + return 1; + } + +#ifdef MPI_ENABLE + if (!trainWithMultipleFolds) + mpi::broadcast(world, order, 0); +#endif + + // Create shards according to the number of processes used + vector shard; + if (trainWithMultipleFolds) { + size_t shardSize = order.size()/coresPerFold; + size_t shardStart = (size_t) (shardSize * (rank % coresPerFold)); + size_t shardEnd = shardStart + shardSize; + if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + batchSize = 1; + } else { + size_t shardSize = order.size() / size; + size_t shardStart = (size_t) (shardSize * rank); + size_t shardEnd = (size_t) (shardSize * (rank + 1)); + if (rank == size - 1) { + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank: " << rank << " Shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + if (batchEqualsShard) + batchSize = shardSize; + } + + // get reference to feature functions + // const vector &featureFunctions = FeatureFunction::GetFeatureFunctions(); + ScoreComponentCollection initialWeights = decoder->getWeights(); + + if (add2lm != 0) { + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + float lmWeight = initialWeights.GetScoreForProducer(lm) + add2lm; + initialWeights.Assign(lm, lmWeight); + cerr << "Rank " << rank << ", add " << add2lm << " to lm weight." << endl; + } + } + } + + if (normaliseWeights) { + initialWeights.L1Normalise(); + cerr << "Rank " << rank << ", normalised initial weights: " << initialWeights << endl; + } + + decoder->setWeights(initialWeights); + + // set bleu weight to twice the size of the language model weight(s) + if (bleu_weight_lm) { + float lmSum = 0; + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + lmSum += abs(initialWeights.GetScoreForProducer(lm)); + } + } + + bleuWeight = lmSum * bleu_weight_lm_factor; + if (!kbest) cerr << "Set bleu weight to lm weight * " << bleu_weight_lm_factor << endl; + } + + // bleu weights can be set separately for hope and fear; otherwise they are both set to 'lm weight * bleu_weight_lm_factor' + if (bleuWeight_hope == -1) { + bleuWeight_hope = bleuWeight; + } + if (bleuWeight_fear == -1) { + bleuWeight_fear = bleuWeight; + } + bleuWeight_fear *= bleu_weight_fear_factor; + if (!kbest) { + cerr << "Bleu weight: " << bleuWeight << endl; + cerr << "Bleu weight fear: " << bleuWeight_fear << endl; + } + + if (decode_hope || decode_fear || decode_model) { + size_t decode = 1; + if (decode_fear) decode = 2; + if (decode_model) decode = 3; + decodeHopeOrFear(rank, size, decode, decode_filename, inputSentences, decoder, n, bleuWeight); + } + + //Main loop: + ScoreComponentCollection cumulativeWeights; // collect weights per epoch to produce an average + ScoreComponentCollection cumulativeWeightsBinary; + size_t numberOfUpdates = 0; + size_t numberOfUpdatesThisEpoch = 0; + + time_t now; + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); + + float avgInputLength = 0; + float sumOfInputs = 0; + size_t numberOfInputs = 0; + + ScoreComponentCollection mixedWeights; + ScoreComponentCollection mixedWeightsPrevious; + ScoreComponentCollection mixedWeightsBeforePrevious; + ScoreComponentCollection mixedAverageWeights; + ScoreComponentCollection mixedAverageWeightsPrevious; + ScoreComponentCollection mixedAverageWeightsBeforePrevious; + + bool stop = false; +// int sumStillViolatedConstraints; + float epsilon = 0.0001; + + // Variables for feature confidence + ScoreComponentCollection confidenceCounts, mixedConfidenceCounts, featureLearningRates; + featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0); //initialise core learning rates + cerr << "Initial learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl; + + for (size_t epoch = continue_epoch; epoch < epochs && !stop; ++epoch) { + if (shuffle) { + if (trainWithMultipleFolds || rank == 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", shuffling input sentences.." << endl; + RandomIndex rindex; + random_shuffle(order.begin(), order.end(), rindex); + } + +#ifdef MPI_ENABLE + if (!trainWithMultipleFolds) + mpi::broadcast(world, order, 0); +#endif + + // redo shards + if (trainWithMultipleFolds) { + size_t shardSize = order.size()/coresPerFold; + size_t shardStart = (size_t) (shardSize * (rank % coresPerFold)); + size_t shardEnd = shardStart + shardSize; + if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + batchSize = 1; + } else { + size_t shardSize = order.size()/size; + size_t shardStart = (size_t) (shardSize * rank); + size_t shardEnd = (size_t) (shardSize * (rank + 1)); + if (rank == size - 1) { + shardEnd = order.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Shard size: " << shardSize << endl); + VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + if (batchEqualsShard) + batchSize = shardSize; + } + } + + // sum of violated constraints in an epoch + // sumStillViolatedConstraints = 0; + + numberOfUpdatesThisEpoch = 0; + // Sum up weights over one epoch, final average uses weights from last epoch + if (!accumulateWeights) { + cumulativeWeights.ZeroAll(); + cumulativeWeightsBinary.ZeroAll(); + } + + // number of weight dumps this epoch + size_t weightMixingThisEpoch = 0; + size_t weightEpochDump = 0; + + size_t shardPosition = 0; + vector::const_iterator sid = shard.begin(); + while (sid != shard.end()) { + // feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues) + vector > featureValues; + vector > bleuScores; + vector > modelScores; + + // variables for hope-fear/perceptron setting + vector > featureValuesHope; + vector > featureValuesFear; + vector > bleuScoresHope; + vector > bleuScoresFear; + vector > modelScoresHope; + vector > modelScoresFear; + + // get moses weights + ScoreComponentCollection mosesWeights = decoder->getWeights(); + VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl); + + if (historyBleu || simpleHistoryBleu) { + decoder->printBleuFeatureHistory(cerr); + } + + // BATCHING: produce nbest lists for all input sentences in batch + vector oracleBleuScores; + vector oracleModelScores; + vector > oneBests; + vector oracleFeatureValues; + vector inputLengths; + vector ref_ids; + size_t actualBatchSize = 0; + + size_t examples_in_batch = 0; + bool skip_example = false; + for (size_t batchPosition = 0; batchPosition < batchSize && sid + != shard.end(); ++batchPosition) { + string input; + if (trainWithMultipleFolds) + input = inputSentencesFolds[myFold][*sid]; + else + input = inputSentences[*sid]; + + Moses::Sentence *sentence = new Sentence(); + stringstream in(input + "\n"); + const vector inputFactorOrder = staticData.GetInputFactorOrder(); + sentence->Read(in,inputFactorOrder); + cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \""; + sentence->Print(cerr); + cerr << "\"" << " (batch pos " << batchPosition << ")" << endl; + size_t current_input_length = (*sentence).GetSize(); + + if (epoch == 0 && (scaleByAvgInputLength || scaleByAvgInverseLength)) { + sumOfInputs += current_input_length; + ++numberOfInputs; + avgInputLength = sumOfInputs/numberOfInputs; + decoder->setAvgInputLength(avgInputLength); + cerr << "Rank " << rank << ", epoch 0, average input length: " << avgInputLength << endl; + } + + vector newFeatureValues; + vector newScores; + if (model_hope_fear) { + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newScores); + modelScores.push_back(newScores); + } + if (hope_fear || perceptron_update) { + featureValuesHope.push_back(newFeatureValues); + featureValuesFear.push_back(newFeatureValues); + bleuScoresHope.push_back(newScores); + bleuScoresFear.push_back(newScores); + modelScoresHope.push_back(newScores); + modelScoresFear.push_back(newScores); + if (historyBleu || simpleHistoryBleu || debug_model) { + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newScores); + modelScores.push_back(newScores); + } + } + if (kbest) { + // for decoding + featureValues.push_back(newFeatureValues); + bleuScores.push_back(newScores); + modelScores.push_back(newScores); + + // for storing selected examples + featureValuesHope.push_back(newFeatureValues); + featureValuesFear.push_back(newFeatureValues); + bleuScoresHope.push_back(newScores); + bleuScoresFear.push_back(newScores); + modelScoresHope.push_back(newScores); + modelScoresFear.push_back(newScores); + } + + size_t ref_length; + float avg_ref_length; + + if (print_weights) + cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: " << mosesWeights << endl; + if (print_core_weights) { + cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: "; + mosesWeights.PrintCoreFeatures(); + cerr << endl; + } + + // check LM weight + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + float lmWeight = mosesWeights.GetScoreForProducer(lm); + cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight: " << lmWeight << endl; + if (lmWeight <= 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: language model weight should never be <= 0." << endl; + mosesWeights.Assign(lm, 0.1); + cerr << "Rank " << rank << ", epoch " << epoch << ", assign lm weights of 0.1" << endl; + } + } + } + + // select inference scheme + cerr << "Rank " << rank << ", epoch " << epoch << ", real Bleu? " << realBleu << endl; + if (hope_fear || perceptron_update) { + // HOPE + cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n << + "best hope translations" << endl; + vector< vector > outputHope = decoder->getNBest(input, *sid, hope_n, 1.0, bleuWeight_hope, + featureValuesHope[batchPosition], bleuScoresHope[batchPosition], modelScoresHope[batchPosition], + 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector oracle = outputHope[0]; + decoder->cleanup(chartDecoding); + ref_length = decoder->getClosestReferenceLength(*sid, oracle.size()); + avg_ref_length = ref_length; + float hope_length_ratio = (float)oracle.size()/ref_length; + cerr << endl; + + // count sparse features occurring in hope translation + featureValuesHope[batchPosition][0].IncrementSparseHopeFeatures(); + + vector bestModel; + if (debug_model || historyBleu || simpleHistoryBleu) { + // MODEL (for updating the history only, using dummy vectors) + cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (debug or history)" << endl; + vector< vector > outputModel = decoder->getNBest(input, *sid, n, 0.0, bleuWeight, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + bestModel = outputModel[0]; + decoder->cleanup(chartDecoding); + cerr << endl; + ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + } + + // FEAR + //float fear_length_ratio = 0; + float bleuRatioHopeFear = 0; + //int fearSize = 0; + cerr << "Rank " << rank << ", epoch " << epoch << ", " << fear_n << "best fear translations" << endl; + vector< vector > outputFear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuWeight_fear, + featureValuesFear[batchPosition], bleuScoresFear[batchPosition], modelScoresFear[batchPosition], + 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector fear = outputFear[0]; + decoder->cleanup(chartDecoding); + ref_length = decoder->getClosestReferenceLength(*sid, fear.size()); + avg_ref_length += ref_length; + avg_ref_length /= 2; + //fear_length_ratio = (float)fear.size()/ref_length; + //fearSize = (int)fear.size(); + cerr << endl; + for (size_t i = 0; i < fear.size(); ++i) + delete fear[i]; + + // count sparse features occurring in fear translation + featureValuesFear[batchPosition][0].IncrementSparseFearFeatures(); + + // Bleu-related example selection + bool skip = false; + bleuRatioHopeFear = bleuScoresHope[batchPosition][0] / bleuScoresFear[batchPosition][0]; + if (minBleuRatio != -1 && bleuRatioHopeFear < minBleuRatio) + skip = true; + if(maxBleuRatio != -1 && bleuRatioHopeFear > maxBleuRatio) + skip = true; + + // sanity check + if (historyBleu || simpleHistoryBleu) { + if (bleuScores[batchPosition][0] > bleuScoresHope[batchPosition][0] && + modelScores[batchPosition][0] > modelScoresHope[batchPosition][0]) { + if (abs(bleuScores[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon && + abs(modelScores[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: MODEL translation better than HOPE translation." << endl; + skip = true; + } + } + if (bleuScoresFear[batchPosition][0] > bleuScores[batchPosition][0] && + modelScoresFear[batchPosition][0] > modelScores[batchPosition][0]) { + if (abs(bleuScoresFear[batchPosition][0] - bleuScores[batchPosition][0]) > epsilon && + abs(modelScoresFear[batchPosition][0] - modelScores[batchPosition][0]) > epsilon) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than MODEL translation." << endl; + skip = true; + } + } + } + if (bleuScoresFear[batchPosition][0] > bleuScoresHope[batchPosition][0]) { + if (abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon) { + // check if it's an error or a warning + skip = true; + if (modelScoresFear[batchPosition][0] > modelScoresHope[batchPosition][0] && abs(modelScoresFear[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) { + cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <getNBest(input, *sid, n, 1.0, bleuWeight_hope, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + //vector oracle = outputHope[0]; + // needed for history + inputLengths.push_back(current_input_length); + ref_ids.push_back(*sid); + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, oracle.size()); + //float hope_length_ratio = (float)oracle.size()/ref_length; + cerr << endl; + + oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]); + oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]); + oracleModelScores.push_back(modelScores[batchPosition][oraclePos]); + + // MODEL + cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl; + if (historyBleu || simpleHistoryBleu) { + vector< vector > outputModel = decoder->getNBest(input, *sid, n, 0.0, + bleuWeight, featureValues[batchPosition], bleuScores[batchPosition], + modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector bestModel = outputModel[0]; + oneBests.push_back(bestModel); + inputLengths.push_back(current_input_length); + ref_ids.push_back(*sid); + } else { + decoder->getNBest(input, *sid, n, 0.0, bleuWeight, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + } + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + //float model_length_ratio = (float)bestModel.size()/ref_length; + cerr << endl; + + // FEAR + cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl; + decoder->getNBest(input, *sid, n, -1.0, bleuWeight_fear, + featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition], + 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, fear.size()); + //float fear_length_ratio = (float)fear.size()/ref_length; + + examples_in_batch++; + } + if (kbest) { + // MODEL + cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl; + if (historyBleu || simpleHistoryBleu) { + vector< vector > outputModel = decoder->getNBest(input, *sid, n, 0.0, + bleuWeight, featureValues[batchPosition], bleuScores[batchPosition], + modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + vector bestModel = outputModel[0]; + oneBests.push_back(bestModel); + inputLengths.push_back(current_input_length); + ref_ids.push_back(*sid); + } else { + decoder->getNBest(input, *sid, n, 0.0, bleuWeight, + featureValues[batchPosition], bleuScores[batchPosition], + modelScores[batchPosition], 0, realBleu, distinctNbest, avgRefLength, rank, epoch, ""); + } + decoder->cleanup(chartDecoding); + //ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size()); + //float model_length_ratio = (float)bestModel.size()/ref_length; + cerr << endl; + + examples_in_batch++; + + HypothesisQueue queueHope(hope_n); + HypothesisQueue queueFear(fear_n); + cerr << endl; + if (most_violated || all_violated) { + float bleuHope = -1000; + float bleuFear = 1000; + int indexHope = -1; + int indexFear = -1; + + vector bleuHopeList; + vector bleuFearList; + vector indexHopeList; + vector indexFearList; + + if (most_violated) + cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with most violated constraint" << endl; + else if (all_violated) + cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with violated constraints"; + else + cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with hope"; + + // find best hope, then find fear that violates our constraint most + for (size_t i=0; i modelScores[batchPosition][indexHope]) { + if (abs(modelScores[batchPosition][i] - modelScores[batchPosition][indexHope]) > epsilon) { + // better model score + bleuHope = bleuScores[batchPosition][i]; + indexHope = i; + } + } + } else if (bleuScores[batchPosition][i] > bleuHope) { // better than current best + bleuHope = bleuScores[batchPosition][i]; + indexHope = i; + } + } + + float currentViolation = 0; + for (size_t i=0; i epsilon) && (modelDiff < bleuDiff)) { + float diff = bleuDiff - modelDiff; + if (diff > epsilon) { + if (all_violated) { + cerr << ".. adding pair"; + bleuHopeList.push_back(bleuHope); + bleuFearList.push_back(bleuScores[batchPosition][i]); + indexHopeList.push_back(indexHope); + indexFearList.push_back(i); + } else if (most_violated && diff > currentViolation) { + currentViolation = diff; + bleuFear = bleuScores[batchPosition][i]; + indexFear = i; + cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << currentViolation << " (" << modelDiff << " >= " << bleuDiff << ")" << endl; + } + } + } + } + + if (most_violated) { + if (currentViolation > 0) { + cerr << "Rank " << rank << ", epoch " << epoch << ", adding pair with violation " << currentViolation << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << bleuHope << " (" << indexHope << "), fear: " << bleuFear << " (" << indexFear << ")" << endl; + bleuScoresHope[batchPosition].push_back(bleuHope); + bleuScoresFear[batchPosition].push_back(bleuFear); + featureValuesHope[batchPosition].push_back(featureValues[batchPosition][indexHope]); + featureValuesFear[batchPosition].push_back(featureValues[batchPosition][indexFear]); + float modelScoreHope = modelScores[batchPosition][indexHope]; + float modelScoreFear = modelScores[batchPosition][indexFear]; + if (most_violated_reg) { + // reduce model score difference by factor ~0.5 + float reg = currentViolation/4; + modelScoreHope += abs(reg); + modelScoreFear -= abs(reg); + float newViolation = (bleuHope - bleuFear) - (modelScoreHope - modelScoreFear); + cerr << "Rank " << rank << ", epoch " << epoch << ", regularized violation: " << newViolation << endl; + } + modelScoresHope[batchPosition].push_back(modelScoreHope); + modelScoresFear[batchPosition].push_back(modelScoreFear); + + featureValues[batchPosition][indexHope].IncrementSparseHopeFeatures(); + featureValues[batchPosition][indexFear].IncrementSparseFearFeatures(); + } else { + cerr << "Rank " << rank << ", epoch " << epoch << ", no violated constraint found." << endl; + skip_example = 1; + } + } else cerr << endl; + } + if (max_bleu_diff) { + cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with max Bleu diff from list: " << bleuScores[batchPosition].size() << endl; + for (size_t i=0; i hopeList, fearList; + for (size_t i=0; i > losses(actualBatchSize); + if (model_hope_fear) { + // Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis) + for (size_t batchPosition = 0; batchPosition < actualBatchSize; ++batchPosition) { + for (size_t j = 0; j < bleuScores[batchPosition].size(); ++j) { + losses[batchPosition].push_back(oracleBleuScores[batchPosition] - bleuScores[batchPosition][j]); + } + } + } + + // set weight for bleu feature to 0 before optimizing + vector::const_iterator iter; + const vector &featureFunctions2 = FeatureFunction::GetFeatureFunctions(); + for (iter = featureFunctions2.begin(); iter != featureFunctions2.end(); ++iter) { + if ((*iter)->GetScoreProducerDescription() == "BleuScoreFeature") { + mosesWeights.Assign(*iter, 0); + break; + } + } + + // scale LM feature (to avoid rapid changes) + if (scale_lm) { + cerr << "scale lm" << endl; + const std::vector &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + for (size_t i = 0; i < statefulFFs.size(); ++i) { + const StatefulFeatureFunction *ff = statefulFFs[i]; + const LanguageModel *lm = dynamic_cast(ff); + + if (lm) { + // scale down score + if (model_hope_fear) { + scaleFeatureScore(lm, scale_lm_factor, featureValues, rank, epoch); + } else { + scaleFeatureScore(lm, scale_lm_factor, featureValuesHope, rank, epoch); + scaleFeatureScore(lm, scale_lm_factor, featureValuesFear, rank, epoch); + } + } + } + } + + // scale WP + if (scale_wp) { + // scale up weight + WordPenaltyProducer &wp = WordPenaltyProducer::InstanceNonConst(); + + // scale down score + if (model_hope_fear) { + scaleFeatureScore(&wp, scale_wp_factor, featureValues, rank, epoch); + } else { + scaleFeatureScore(&wp, scale_wp_factor, featureValuesHope, rank, epoch); + scaleFeatureScore(&wp, scale_wp_factor, featureValuesFear, rank, epoch); + } + } + + // print out the feature values + if (print_feature_values) { + cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl; + if (model_hope_fear) printFeatureValues(featureValues); + else { + cerr << "hope: " << endl; + printFeatureValues(featureValuesHope); + cerr << "fear: " << endl; + printFeatureValues(featureValuesFear); + } + } + + // apply learning rates to feature vectors before optimization + if (feature_confidence) { + cerr << "Rank " << rank << ", epoch " << epoch << ", apply feature learning rates with decays " << decay_core << "/" << decay_sparse << ": " << featureLearningRates << endl; + if (model_hope_fear) { + applyPerFeatureLearningRates(featureValues, featureLearningRates, sparse_r0); + } else { + applyPerFeatureLearningRates(featureValuesHope, featureLearningRates, sparse_r0); + applyPerFeatureLearningRates(featureValuesFear, featureLearningRates, sparse_r0); + } + } else { + // apply fixed learning rates + cerr << "Rank " << rank << ", epoch " << epoch << ", apply fixed learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl; + if (core_r0 != 1.0 || sparse_r0 != 1.0) { + if (model_hope_fear) { + applyLearningRates(featureValues, core_r0, sparse_r0); + } else { + applyLearningRates(featureValuesHope, core_r0, sparse_r0); + applyLearningRates(featureValuesFear, core_r0, sparse_r0); + } + } + } + + // Run optimiser on batch: + VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl); + size_t update_status = 1; + ScoreComponentCollection weightUpdate; + if (perceptron_update) { + vector > dummy1; + update_status = optimiser->updateWeightsHopeFear( weightUpdate, featureValuesHope, + featureValuesFear, dummy1, dummy1, dummy1, dummy1, learning_rate, rank, epoch); + } else if (hope_fear) { + if (bleuScoresHope[0][0] >= min_oracle_bleu) { + if (hope_n == 1 && fear_n ==1 && batchSize == 1 && !hildreth) { + update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(weightUpdate, + featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0], + bleuScoresFear[0][0], modelScoresHope[0][0], modelScoresFear[0][0], learning_rate, rank, epoch); + } else + update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope, + featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope, + modelScoresFear, learning_rate, rank, epoch); + } else + update_status = 1; + } else if (kbest) { + if (batchSize == 1 && featureValuesHope[0].size() == 1 && !hildreth) { + cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl; + update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically( + weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0], + bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0], + modelScoresFear[0][0], learning_rate, rank, epoch); + } else { + cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl; + update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope, + featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope, + modelScoresFear, learning_rate, rank, epoch); + } + } else { + // model_hope_fear + update_status = ((MiraOptimiser*) optimiser)->updateWeights(weightUpdate, + featureValues, losses, bleuScores, modelScores, oracleFeatureValues, + oracleBleuScores, oracleModelScores, learning_rate, rank, epoch); + } + + // sumStillViolatedConstraints += update_status; + + if (update_status == 0) { // if weights were updated + // apply weight update + if (debug) + cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << weightUpdate << endl; + + if (feature_confidence) { + // update confidence counts based on weight update + confidenceCounts.UpdateConfidenceCounts(weightUpdate, signed_counts); + + // update feature learning rates + featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0); + } + + // apply weight update to Moses weights + mosesWeights.PlusEquals(weightUpdate); + + if (normaliseWeights) + mosesWeights.L1Normalise(); + + cumulativeWeights.PlusEquals(mosesWeights); + if (sparseAverage) { + ScoreComponentCollection binary; + binary.SetToBinaryOf(mosesWeights); + cumulativeWeightsBinary.PlusEquals(binary); + } + + ++numberOfUpdates; + ++numberOfUpdatesThisEpoch; + if (averageWeights) { + ScoreComponentCollection averageWeights(cumulativeWeights); + if (accumulateWeights) { + averageWeights.DivideEquals(numberOfUpdates); + } else { + averageWeights.DivideEquals(numberOfUpdatesThisEpoch); + } + + mosesWeights = averageWeights; + } + + // set new Moses weights + decoder->setWeights(mosesWeights); + //cerr << "Rank " << rank << ", epoch " << epoch << ", new weights: " << mosesWeights << endl; + } + + // update history (for approximate document Bleu) + if (historyBleu || simpleHistoryBleu) { + for (size_t i = 0; i < oneBests.size(); ++i) + cerr << "Rank " << rank << ", epoch " << epoch << ", update history with 1best length: " << oneBests[i].size() << " "; + decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch); + deleteTranslations(oneBests); + } + } // END TRANSLATE AND UPDATE BATCH + + // size of all shards except for the last one + size_t generalShardSize; + if (trainWithMultipleFolds) + generalShardSize = order.size()/coresPerFold; + else + generalShardSize = order.size()/size; + + size_t mixing_base = mixingFrequency == 0 ? 0 : generalShardSize / mixingFrequency; + size_t dumping_base = weightDumpFrequency == 0 ? 0 : generalShardSize / weightDumpFrequency; + bool mix = evaluateModulo(shardPosition, mixing_base, actualBatchSize); + + // mix weights? + if (mix) { +#ifdef MPI_ENABLE + cerr << "Rank " << rank << ", epoch " << epoch << ", mixing weights.. " << endl; + // collect all weights in mixedWeights and divide by number of processes + mpi::reduce(world, mosesWeights, mixedWeights, SCCPlus(), 0); + + // mix confidence counts + //mpi::reduce(world, confidenceCounts, mixedConfidenceCounts, SCCPlus(), 0); + ScoreComponentCollection totalBinary; + if (sparseAverage) { + ScoreComponentCollection binary; + binary.SetToBinaryOf(mosesWeights); + mpi::reduce(world, binary, totalBinary, SCCPlus(), 0); + } + if (rank == 0) { + // divide by number of processes + if (sparseNoAverage) + mixedWeights.CoreDivideEquals(size); // average only core weights + else if (sparseAverage) + mixedWeights.DivideEquals(totalBinary); + else + mixedWeights.DivideEquals(size); + + // divide confidence counts + //mixedConfidenceCounts.DivideEquals(size); + + // normalise weights after averaging + if (normaliseWeights) { + mixedWeights.L1Normalise(); + } + + ++weightMixingThisEpoch; + + if (pruneZeroWeights) { + size_t pruned = mixedWeights.PruneZeroWeightFeatures(); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " zero-weighted features pruned from mixedWeights." << endl; + + pruned = cumulativeWeights.PruneZeroWeightFeatures(); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " zero-weighted features pruned from cumulativeWeights." << endl; + } + + if (featureCutoff != -1 && weightMixingThisEpoch == mixingFrequency) { + size_t pruned = mixedWeights.PruneSparseFeatures(featureCutoff); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " features pruned from mixedWeights." << endl; + + pruned = cumulativeWeights.PruneSparseFeatures(featureCutoff); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << pruned << " features pruned from cumulativeWeights." << endl; + } + + if (weightMixingThisEpoch == mixingFrequency || reg_on_every_mix) { + if (l1_regularize) { + size_t pruned; + if (l1_reg_sparse) + pruned = mixedWeights.SparseL1Regularize(l1_lambda); + else + pruned = mixedWeights.L1Regularize(l1_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l1-reg. on mixedWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl; + } + if (l2_regularize) { + if (l2_reg_sparse) + mixedWeights.SparseL2Regularize(l2_lambda); + else + mixedWeights.L2Regularize(l2_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l2-reg. on mixedWeights with lambda=" << l2_lambda << endl; + } + } + } + + // broadcast average weights from process 0 + mpi::broadcast(world, mixedWeights, 0); + decoder->setWeights(mixedWeights); + mosesWeights = mixedWeights; + + // broadcast summed confidence counts + //mpi::broadcast(world, mixedConfidenceCounts, 0); + //confidenceCounts = mixedConfidenceCounts; +#endif +#ifndef MPI_ENABLE + //cerr << "\nRank " << rank << ", no mixing, weights: " << mosesWeights << endl; + mixedWeights = mosesWeights; +#endif + } // end mixing + + // Dump weights? + if (trainWithMultipleFolds || weightEpochDump == weightDumpFrequency) { + // dump mixed weights at end of every epoch to enable continuing a crashed experiment + // (for jackknife every time the weights are mixed) + ostringstream filename; + if (epoch < 10) + filename << weightDumpStem << "_mixed_0" << epoch; + else + filename << weightDumpStem << "_mixed_" << epoch; + + if (weightDumpFrequency > 1) + filename << "_" << weightEpochDump; + + mixedWeights.Save(filename.str()); + cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl; + } + if (dumpMixedWeights) { + if (mix && rank == 0 && !weightDumpStem.empty()) { + // dump mixed weights instead of average weights + ostringstream filename; + if (epoch < 10) + filename << weightDumpStem << "_0" << epoch; + else + filename << weightDumpStem << "_" << epoch; + + if (weightDumpFrequency > 1) + filename << "_" << weightEpochDump; + + cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl; + mixedWeights.Save(filename.str()); + ++weightEpochDump; + } + } else { + if (evaluateModulo(shardPosition, dumping_base, actualBatchSize)) { + cerr << "Rank " << rank << ", epoch " << epoch << ", dump weights.. (pos: " << shardPosition << ", base: " << dumping_base << ")" << endl; + ScoreComponentCollection tmpAverageWeights(cumulativeWeights); + bool proceed = false; + if (accumulateWeights) { + if (numberOfUpdates > 0) { + tmpAverageWeights.DivideEquals(numberOfUpdates); + proceed = true; + } + } else { + if (numberOfUpdatesThisEpoch > 0) { + if (sparseNoAverage) // average only core weights + tmpAverageWeights.CoreDivideEquals(numberOfUpdatesThisEpoch); + else if (sparseAverage) + tmpAverageWeights.DivideEquals(cumulativeWeightsBinary); + else + tmpAverageWeights.DivideEquals(numberOfUpdatesThisEpoch); + proceed = true; + } + } + + if (proceed) { +#ifdef MPI_ENABLE + // average across processes + mpi::reduce(world, tmpAverageWeights, mixedAverageWeights, SCCPlus(), 0); + ScoreComponentCollection totalBinary; + if (sparseAverage) { + ScoreComponentCollection binary; + binary.SetToBinaryOf(mosesWeights); + mpi::reduce(world, binary, totalBinary, SCCPlus(), 0); + } +#endif +#ifndef MPI_ENABLE + mixedAverageWeights = tmpAverageWeights; + //FIXME: What do to for non-mpi version + ScoreComponentCollection totalBinary; +#endif + if (rank == 0 && !weightDumpStem.empty()) { + // divide by number of processes + if (sparseNoAverage) + mixedAverageWeights.CoreDivideEquals(size); // average only core weights + else if (sparseAverage) + mixedAverageWeights.DivideEquals(totalBinary); + else + mixedAverageWeights.DivideEquals(size); + + // normalise weights after averaging + if (normaliseWeights) { + mixedAverageWeights.L1Normalise(); + } + + // dump final average weights + ostringstream filename; + if (epoch < 10) { + filename << weightDumpStem << "_0" << epoch; + } else { + filename << weightDumpStem << "_" << epoch; + } + + if (weightDumpFrequency > 1) { + filename << "_" << weightEpochDump; + } + + /*if (accumulateWeights) { + cerr << "\nMixed average weights (cumulative) during epoch " << epoch << ": " << mixedAverageWeights << endl; + } else { + cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl; + }*/ + + cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl; + mixedAverageWeights.Save(filename.str()); + ++weightEpochDump; + + if (weightEpochDump == weightDumpFrequency) { + if (l1_regularize) { + size_t pruned = mixedAverageWeights.SparseL1Regularize(l1_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l1-reg. on mixedAverageWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl; + + } + if (l2_regularize) { + mixedAverageWeights.SparseL2Regularize(l2_lambda); + cerr << "Rank " << rank << ", epoch " << epoch << ", " + << "l2-reg. on mixedAverageWeights with lambda=" << l2_lambda << endl; + } + + if (l1_regularize || l2_regularize) { + filename << "_reg"; + cerr << "Dumping regularized mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl; + mixedAverageWeights.Save(filename.str()); + } + } + + if (weightEpochDump == weightDumpFrequency && printFeatureCounts) { + // print out all features with counts + stringstream s1, s2; + s1 << "sparse_feature_hope_counts" << "_" << epoch; + s2 << "sparse_feature_fear_counts" << "_" << epoch; + ofstream sparseFeatureCountsHope(s1.str().c_str()); + ofstream sparseFeatureCountsFear(s2.str().c_str()); + + mixedAverageWeights.PrintSparseHopeFeatureCounts(sparseFeatureCountsHope); + mixedAverageWeights.PrintSparseFearFeatureCounts(sparseFeatureCountsFear); + sparseFeatureCountsHope.close(); + sparseFeatureCountsFear.close(); + } + } + } + }// end dumping + } // end if dump + } // end of shard loop, end of this epoch + cerr << "Rank " << rank << ", epoch " << epoch << ", end of epoch.." << endl; + + if (historyBleu || simpleHistoryBleu) { + cerr << "Bleu feature history after epoch " << epoch << endl; + decoder->printBleuFeatureHistory(cerr); + } + // cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl; + + // Check whether there were any weight updates during this epoch + size_t sumUpdates; + size_t *sendbuf_uint, *recvbuf_uint; + sendbuf_uint = (size_t *) malloc(sizeof(size_t)); + recvbuf_uint = (size_t *) malloc(sizeof(size_t)); +#ifdef MPI_ENABLE + sendbuf_uint[0] = numberOfUpdatesThisEpoch; + recvbuf_uint[0] = 0; + MPI_Reduce(sendbuf_uint, recvbuf_uint, 1, MPI_UNSIGNED, MPI_SUM, 0, world); + sumUpdates = recvbuf_uint[0]; +#endif +#ifndef MPI_ENABLE + sumUpdates = numberOfUpdatesThisEpoch; +#endif + if (rank == 0 && sumUpdates == 0) { + cerr << "\nNo weight updates during this epoch.. stopping." << endl; + stop = true; +#ifdef MPI_ENABLE + mpi::broadcast(world, stop, 0); +#endif + } + + if (!stop) { + // Test if weights have converged + if (weightConvergence) { + bool reached = true; + if (rank == 0 && (epoch >= 2)) { + ScoreComponentCollection firstDiff, secondDiff; + if (dumpMixedWeights) { + firstDiff = mixedWeights; + firstDiff.MinusEquals(mixedWeightsPrevious); + secondDiff = mixedWeights; + secondDiff.MinusEquals(mixedWeightsBeforePrevious); + } else { + firstDiff = mixedAverageWeights; + firstDiff.MinusEquals(mixedAverageWeightsPrevious); + secondDiff = mixedAverageWeights; + secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious); + } + VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << " (max: " << firstDiff.GetLInfNorm() << ")" << endl); + VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << " (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl); + + // check whether stopping criterion has been reached + // (both difference vectors must have all weight changes smaller than min_weight_change) + if (firstDiff.GetLInfNorm() >= min_weight_change) + reached = false; + if (secondDiff.GetLInfNorm() >= min_weight_change) + reached = false; + if (reached) { + // stop MIRA + stop = true; + cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl; + ScoreComponentCollection dummy; + ostringstream endfilename; + endfilename << "stopping"; + dummy.Save(endfilename.str()); + } + } + + mixedWeightsBeforePrevious = mixedWeightsPrevious; + mixedWeightsPrevious = mixedWeights; + mixedAverageWeightsBeforePrevious = mixedAverageWeightsPrevious; + mixedAverageWeightsPrevious = mixedAverageWeights; +#ifdef MPI_ENABLE + mpi::broadcast(world, stop, 0); +#endif + } //end if (weightConvergence) + } + } // end of epoch loop + +#ifdef MPI_ENABLE + MPI_Finalize(); +#endif + + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); + + if (rank == 0) { + ScoreComponentCollection dummy; + ostringstream endfilename; + endfilename << "finished"; + dummy.Save(endfilename.str()); + } + + delete decoder; + exit(0); +} + +bool loadSentences(const string& filename, vector& sentences) +{ + ifstream in(filename.c_str()); + if (!in) + return false; + string line; + while (getline(in, line)) + sentences.push_back(line); + return true; +} + +bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size) +{ + if (mix_or_dump_base == 0) return 0; + if (actual_batch_size > 1) { + bool mix_or_dump = false; + size_t numberSubtracts = actual_batch_size; + do { + if (shard_position % mix_or_dump_base == 0) { + mix_or_dump = true; + break; + } + --shard_position; + --numberSubtracts; + } while (numberSubtracts > 0); + return mix_or_dump; + } else { + return ((shard_position % mix_or_dump_base) == 0); + } +} + +void printFeatureValues(vector > &featureValues) +{ + for (size_t i = 0; i < featureValues.size(); ++i) { + for (size_t j = 0; j < featureValues[i].size(); ++j) { + cerr << featureValues[i][j] << endl; + } + } + cerr << endl; +} + +void deleteTranslations(vector > &translations) +{ + for (size_t i = 0; i < translations.size(); ++i) { + for (size_t j = 0; j < translations[i].size(); ++j) { + delete translations[i][j]; + } + } +} + +void decodeHopeOrFear(size_t rank, size_t size, size_t decode, string filename, vector &inputSentences, MosesDecoder* decoder, size_t n, float bleuWeight) +{ + if (decode == 1) + cerr << "Rank " << rank << ", decoding dev input set according to hope objective.. " << endl; + else if (decode == 2) + cerr << "Rank " << rank << ", decoding dev input set according to fear objective.. " << endl; + else + cerr << "Rank " << rank << ", decoding dev input set according to normal objective.. " << endl; + + // Create shards according to the number of processes used + vector order; + for (size_t i = 0; i < inputSentences.size(); ++i) + order.push_back(i); + + vector shard; + float shardSize = (float) (order.size()) / size; + size_t shardStart = (size_t) (shardSize * rank); + size_t shardEnd = (size_t) (shardSize * (rank + 1)); + if (rank == size - 1) { + shardEnd = inputSentences.size(); + shardSize = shardEnd - shardStart; + } + VERBOSE(1, "Rank " << rank << ", shard start: " << shardStart << " Shard end: " << shardEnd << endl); + VERBOSE(1, "Rank " << rank << ", shard size: " << shardSize << endl); + shard.resize(shardSize); + copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin()); + + // open files for writing + stringstream fname; + fname << filename << ".rank" << rank; + filename = fname.str(); + ostringstream filename_nbest; + filename_nbest << filename << "." << n << "best"; + ofstream out(filename.c_str()); + ofstream nbest_out((filename_nbest.str()).c_str()); + if (!out) { + ostringstream msg; + msg << "Unable to open " << fname.str(); + throw runtime_error(msg.str()); + } + if (!nbest_out) { + ostringstream msg; + msg << "Unable to open " << filename_nbest; + throw runtime_error(msg.str()); + } + + for (size_t i = 0; i < shard.size(); ++i) { + size_t sid = shard[i]; + string& input = inputSentences[sid]; + + vector > dummyFeatureValues; + vector > dummyBleuScores; + vector > dummyModelScores; + + vector newFeatureValues; + vector newScores; + dummyFeatureValues.push_back(newFeatureValues); + dummyBleuScores.push_back(newScores); + dummyModelScores.push_back(newScores); + + float factor = 0.0; + if (decode == 1) factor = 1.0; + if (decode == 2) factor = -1.0; + cerr << "Rank " << rank << ", translating sentence " << sid << endl; + bool realBleu = false; + vector< vector > nbestOutput = decoder->getNBest(input, sid, n, factor, bleuWeight, dummyFeatureValues[0], + dummyBleuScores[0], dummyModelScores[0], n, realBleu, true, false, rank, 0, ""); + cerr << endl; + decoder->cleanup(StaticData::Instance().IsChart()); + + for (size_t i = 0; i < nbestOutput.size(); ++i) { + vector output = nbestOutput[i]; + stringstream translation; + for (size_t k = 0; k < output.size(); ++k) { + Word* w = const_cast(output[k]); + translation << w->GetString(0); + translation << " "; + } + + if (i == 0) + out << translation.str() << endl; + nbest_out << sid << " ||| " << translation.str() << " ||| " << dummyFeatureValues[0][i] << + " ||| " << dummyModelScores[0][i] << " ||| sBleu=" << dummyBleuScores[0][i] << endl; + } + } + + out.close(); + nbest_out.close(); + cerr << "Closing files " << filename << " and " << filename_nbest.str() << endl; + +#ifdef MPI_ENABLE + MPI_Finalize(); +#endif + + time_t now; + time(&now); + cerr << "Rank " << rank << ", " << ctime(&now); + + delete decoder; + exit(0); +} + +void applyLearningRates(vector > &featureValues, float core_r0, float sparse_r0) +{ + for (size_t i=0; i > &featureValues, ScoreComponentCollection featureLearningRates, float sparse_r0) +{ + for (size_t i=0; i > &featureValues, size_t rank, size_t epoch) +{ + string name = sp->GetScoreProducerDescription(); + + // scale down score + float featureScore; + for (size_t i=0; i > &featureValues, size_t rank, size_t epoch) +{ + string name = sp->GetScoreProducerDescription(); + + // scale down score + for (size_t i=0; i featureScores = featureValues[i][j].GetScoresForProducer(sp); + for (size_t k=0; k >& featureValuesHope, + const vector< vector >& featureValuesFear, + const vector< vector >& dummy1, + const vector< vector >& dummy2, + const vector< vector >& dummy3, + const vector< vector >& dummy4, + float perceptron_learning_rate, + size_t rank, + size_t epoch, + int updatePosition) +{ + cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope[0][0] << endl; + cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear[0][0] << endl; + ScoreComponentCollection featureValueDiff = featureValuesHope[0][0]; + featureValueDiff.MinusEquals(featureValuesFear[0][0]); + cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl; + featureValueDiff.MultiplyEquals(perceptron_learning_rate); + weightUpdate.PlusEquals(featureValueDiff); + cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl; + return 0; +} + +} + diff --git a/mosesdecoder/contrib/mira/mira.xcodeproj/project.pbxproj b/mosesdecoder/contrib/mira/mira.xcodeproj/project.pbxproj new file mode 100644 index 0000000000000000000000000000000000000000..67662f4e0fa05be81eb60350292b411df922d4e9 --- /dev/null +++ b/mosesdecoder/contrib/mira/mira.xcodeproj/project.pbxproj @@ -0,0 +1,401 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 1E141A311243527800123194 /* Perceptron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E141A2F1243527800123194 /* Perceptron.cpp */; }; + 1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */; }; + 1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC6391242602F0059001A /* Decoder.cpp */; }; + 1E9DC63D1242602F0059001A /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC63B1242602F0059001A /* Main.cpp */; }; + 1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D1124268310059001A /* libmoses-chart.a */; }; + 1E9DC6DB124268510059001A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6CB124268270059001A /* libmoses.a */; }; + 1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D9124268440059001A /* libOnDiskPt.a */; }; + 8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* mira.1 */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 1E9DC6CA124268270059001A /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = D2AAC046055464E500DB518D; + remoteInfo = moses; + }; + 1E9DC6D0124268310059001A /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = D2AAC046055464E500DB518D; + remoteInfo = "moses-chart"; + }; + 1E9DC6D8124268440059001A /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = D2AAC046055464E500DB518D; + remoteInfo = OnDiskPt; + }; + 1EF4E84C12440612006233A0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D /* moses */; + remoteInfo = moses; + }; + 1EF4E84E12440612006233A0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D /* moses-chart */; + remoteInfo = "moses-chart"; + }; + 1EF4E85012440612006233A0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = D2AAC045055464E500DB518D /* OnDiskPt */; + remoteInfo = OnDiskPt; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8DD76F690486A84900D96B5E /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + 8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 1E141A2F1243527800123194 /* Perceptron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Perceptron.cpp; sourceTree = ""; }; + 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MiraOptimiser.cpp; sourceTree = ""; }; + 1E9DC6391242602F0059001A /* Decoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Decoder.cpp; sourceTree = ""; }; + 1E9DC63A1242602F0059001A /* Decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Decoder.h; sourceTree = ""; }; + 1E9DC63B1242602F0059001A /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Main.cpp; sourceTree = ""; }; + 1E9DC63E124260370059001A /* Optimiser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Optimiser.h; sourceTree = ""; }; + 1E9DC6C6124268270059001A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; }; + 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = "moses-chart.xcodeproj"; path = "../moses-chart/moses-chart.xcodeproj"; sourceTree = SOURCE_ROOT; }; + 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; }; + 1E9DC76712426FC60059001A /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Main.h; sourceTree = ""; }; + 8DD76F6C0486A84900D96B5E /* mira */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mira; sourceTree = BUILT_PRODUCTS_DIR; }; + C6859E8B029090EE04C91782 /* mira.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mira.1; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8DD76F660486A84900D96B5E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */, + 1E9DC6DB124268510059001A /* libmoses.a in Frameworks */, + 1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* mira */ = { + isa = PBXGroup; + children = ( + 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */, + 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */, + 1E9DC6C6124268270059001A /* moses.xcodeproj */, + 08FB7795FE84155DC02AAC07 /* Source */, + C6859E8C029090F304C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = mira; + sourceTree = ""; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */, + 1E141A2F1243527800123194 /* Perceptron.cpp */, + 1E9DC63E124260370059001A /* Optimiser.h */, + 1E9DC6391242602F0059001A /* Decoder.cpp */, + 1E9DC63A1242602F0059001A /* Decoder.h */, + 1E9DC63B1242602F0059001A /* Main.cpp */, + 1E9DC76712426FC60059001A /* Main.h */, + ); + name = Source; + sourceTree = ""; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 8DD76F6C0486A84900D96B5E /* mira */, + ); + name = Products; + sourceTree = ""; + }; + 1E9DC6C7124268270059001A /* Products */ = { + isa = PBXGroup; + children = ( + 1E9DC6CB124268270059001A /* libmoses.a */, + ); + name = Products; + sourceTree = ""; + }; + 1E9DC6CD124268310059001A /* Products */ = { + isa = PBXGroup; + children = ( + 1E9DC6D1124268310059001A /* libmoses-chart.a */, + ); + name = Products; + sourceTree = ""; + }; + 1E9DC6D5124268440059001A /* Products */ = { + isa = PBXGroup; + children = ( + 1E9DC6D9124268440059001A /* libOnDiskPt.a */, + ); + name = Products; + sourceTree = ""; + }; + C6859E8C029090F304C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + C6859E8B029090EE04C91782 /* mira.1 */, + ); + name = Documentation; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8DD76F620486A84900D96B5E /* mira */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */; + buildPhases = ( + 8DD76F640486A84900D96B5E /* Sources */, + 8DD76F660486A84900D96B5E /* Frameworks */, + 8DD76F690486A84900D96B5E /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + 1EF4E84D12440612006233A0 /* PBXTargetDependency */, + 1EF4E84F12440612006233A0 /* PBXTargetDependency */, + 1EF4E85112440612006233A0 /* PBXTargetDependency */, + ); + name = mira; + productInstallPath = "$(HOME)/bin"; + productName = mira; + productReference = 8DD76F6C0486A84900D96B5E /* mira */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* mira */; + projectDirPath = ""; + projectReferences = ( + { + ProductGroup = 1E9DC6CD124268310059001A /* Products */; + ProjectRef = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */; + }, + { + ProductGroup = 1E9DC6C7124268270059001A /* Products */; + ProjectRef = 1E9DC6C6124268270059001A /* moses.xcodeproj */; + }, + { + ProductGroup = 1E9DC6D5124268440059001A /* Products */; + ProjectRef = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */; + }, + ); + projectRoot = ""; + targets = ( + 8DD76F620486A84900D96B5E /* mira */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXReferenceProxy section */ + 1E9DC6CB124268270059001A /* libmoses.a */ = { + isa = PBXReferenceProxy; + fileType = archive.ar; + path = libmoses.a; + remoteRef = 1E9DC6CA124268270059001A /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; + 1E9DC6D1124268310059001A /* libmoses-chart.a */ = { + isa = PBXReferenceProxy; + fileType = archive.ar; + path = "libmoses-chart.a"; + remoteRef = 1E9DC6D0124268310059001A /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; + 1E9DC6D9124268440059001A /* libOnDiskPt.a */ = { + isa = PBXReferenceProxy; + fileType = archive.ar; + path = libOnDiskPt.a; + remoteRef = 1E9DC6D8124268440059001A /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; +/* End PBXReferenceProxy section */ + +/* Begin PBXSourcesBuildPhase section */ + 8DD76F640486A84900D96B5E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */, + 1E9DC63D1242602F0059001A /* Main.cpp in Sources */, + 1E141A311243527800123194 /* Perceptron.cpp in Sources */, + 1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 1EF4E84D12440612006233A0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = moses; + targetProxy = 1EF4E84C12440612006233A0 /* PBXContainerItemProxy */; + }; + 1EF4E84F12440612006233A0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = "moses-chart"; + targetProxy = 1EF4E84E12440612006233A0 /* PBXContainerItemProxy */; + }; + 1EF4E85112440612006233A0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = OnDiskPt; + targetProxy = 1EF4E85012440612006233A0 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 1DEB923208733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + ../irstlm/lib/i386, + ../srilm/lib/macosx, + ); + OTHER_LDFLAGS = ( + "-lboost_program_options", + "-lz", + "-lirstlm", + "-lmisc", + "-ldstruct", + "-loolm", + "-lflm", + "-llattice", + ); + PRODUCT_NAME = mira; + }; + name = Debug; + }; + 1DEB923308733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + ../irstlm/lib/i386, + ../srilm/lib/macosx, + ); + OTHER_LDFLAGS = ( + "-lboost_program_options", + "-lz", + "-lirstlm", + "-lmisc", + "-ldstruct", + "-loolm", + "-lflm", + "-llattice", + ); + PRODUCT_NAME = mira; + }; + name = Release; + }; + 1DEB923608733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + /usr/local/include, + "../moses-chart/src", + ../moses/src, + ../irstlm/include, + ); + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Debug; + }; + 1DEB923708733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ( + /usr/local/include, + "../moses-chart/src", + ../moses/src, + ../irstlm/include, + ); + PREBINDING = NO; + SDKROOT = macosx10.6; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923208733DC60010E9CD /* Debug */, + 1DEB923308733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923608733DC60010E9CD /* Debug */, + 1DEB923708733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff --git a/mosesdecoder/contrib/moses-speedtest/README.md b/mosesdecoder/contrib/moses-speedtest/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cdf3eb6528da8f765c9ec2cc8cb3d3ca615b4ed7 --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/README.md @@ -0,0 +1,146 @@ +# Moses speedtesting framework + +### Description + +This is an automatic test framework that is designed to test the day to day performance changes in Moses. + +### Set up + +#### Set up a Moses repo +Set up a Moses repo and build it with the desired configuration. +```bash +git clone https://github.com/moses-smt/mosesdecoder.git +cd mosesdecoder +./bjam -j10 --with-cmph=/usr/include/ +``` +You need to build Moses first, so that the testsuite knows what command you want it to use when rebuilding against newer revisions. + +#### Create a parent directory. +Create a parent directory where the **runtests.py** and related scripts and configuration file should reside. +This should also be the location of the TEST_DIR and TEST_LOG_DIR as explained in the next section. + +#### Set up a global configuration file. +You need a configuration file for the testsuite. A sample configuration file is provided in **testsuite\_config** +
+MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
+DROP_CACHES_COMM: sys_drop_caches 3
+TEST_DIR: /home/moses-speedtest/phrase_tables/tests
+TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
+BASEBRANCH: RELEASE-2.1.1
+MOSES_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-prof
+MOSES_GOOGLE_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-gperftools
+
+ +The _MOSES\_REPO\_PATH_ is the place where you have set up and built moses. +The _DROP\_CACHES\_COMM_ is the command that would be used to drop caches. It should run without needing root access. +_TEST\_DIR_ is the directory where all the tests will reside. +_TEST\_LOG\_DIR_ is the directory where the performance logs will be gathered. It should be created before running the testsuite for the first time. +_BASEBRANCH_ is the branch against which all new tests will be compared. It should normally be set to be the latest Moses stable release. +_MOSES\_PROFILER\_REPO_ is a path to a moses repository set up and built with profiling enabled. Optional if you want to produce profiling results. +_MOSES\_GOOGLE\_PROFILER\_REPO is a path to moses repository set up with full tcmalloc and profiler, as well as shared link for use with gperftools. +### Creating tests + +In order to create a test one should go into the TEST_DIR and create a new folder. That folder will be used for the name of the test. +Inside that folder one should place a configuration file named **config**. The naming is mandatory. +An example such configuration file is **test\_config** + +
+Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
+LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/, 
+Variants: vanilla, cached, ldpre, profile, google-profiler #Can't have cached without ldpre or vanilla
+
+ +The _Command:_ line specifies the executable (which is looked up in the /bin directory of the repo.) and any arguments necessary. Before running the test, the script cds to the current test directory so you can use relative paths. +The _LDPRE:_ specifies if tests should be run with any LD\_PRELOAD flags. +The _Variants:_ line specifies what type of tests should we run. This particular line will run the following tests: +1. A Vanilla test meaning just the command after _Command_ will be issued. +2. A vanilla cached test meaning that after the vanilla test, the test will be run again without dropping caches in order to benchmark performance on cached filesystem. +3. A test with LD_PRELOAD ldpreloads moses -f command. For each available LDPRELOAD comma separated library to preload. +4. A cached version of all LD_PRELOAD tests. +5. A profile variant is only available if you have setup the profiler repository. It produces gprof outputs for all of the above in a subdirectory inside the _TEST\_LOG\_DIR. + +#### Produce profiler results. +If you want to produce profiler results together in some tests you need to specify the _MOSES\_PROFILER\_REPO_ in the config +```bash +git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-profile +cd mosesdecoder-profile +./bjam -j10 --with-cmph=/usr/include/ variant=profile +``` + +Afterwards for testcases which contain the **profile** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **gprof** output from every run (files ending in **\_profile**). + +#### Produce google profiler results. +If you want to produce profiler results together in some tests you need to specify the _MOSES\_GOOGLE\_PROFILER\_REPO in the config +```bash +git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-google-profile +cd mosesdecoder +./bjam link=shared -j10 --full-tcmalloc --with-cmph=/usr/include/ +``` + +Afterwards for testcases which contain the **google-profiler** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **google-profiler** output from every run (files prefixed with **pprof**). To analyze the output you need to use [pprof](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html). + +### Running tests. +Running the tests is done through the **runtests.py** script. + +#### Running all tests. +To run all tests, with the base branch and the latests revision (and generate new basebranch test data if such is missing) do a: +```bash +python3 runtests.py -c testsuite_config +``` + +#### Running specific tests. +The script allows the user to manually run a particular test or to test against a specific branch or revision: +
+moses-speedtest@crom:~/phrase_tables$ python3 runtests.py --help
+usage: runtests.py [-h] -c CONFIGFILE [-s SINGLETESTDIR] [-r REVISION]
+                   [-b BRANCH]
+
+A python based speedtest suite for moses.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -c CONFIGFILE, --configfile CONFIGFILE
+                        Specify test config file
+  -s SINGLETESTDIR, --singletest SINGLETESTDIR
+                        Single test name directory. Specify directory name,
+                        not full path!
+  -r REVISION, --revision REVISION
+                        Specify a specific revison for the test.
+  -b BRANCH, --branch BRANCH
+                        Specify a branch for the test.
+
+ +### Generating HTML report. +To generate a summary of the test results use the **html\_gen.py** script. It places a file named *index.html* in the current script directory. +```bash +python3 html_gen.py testsuite_config +``` +You should use the generated file with the **style.css** file provided in the html directory. + +### Command line regression testing. +Alternatively you could check for regressions from the command line using the **check\_fo\r_regression.py** script: +```bash +python3 check_for_regression.py TESTLOGS_DIRECTORY +``` + +Alternatively the results of all tests are logged inside the the specified TESTLOGS directory so you can manually check them for additional information such as date, time, revision, branch, etc... + +### Create a cron job: +Create a cron job to run the tests daily and generate an html report. An example *cronjob* is available. +```bash +#!/bin/sh +cd /home/moses-speedtest/phrase_tables + +python3 runtests.py -c testsuite_config #Run the tests. +python3 html_gen.py testsuite_config #Generate html + +cp index.html /fs/thor4/html/www/speed-test/ #Update the html +``` + +Place the script in _/etc/cron.daily_ for dayly testing + +###### Author +Nikolay Bogoychev, 2014 + +###### License +This software is licensed under the LGPL. \ No newline at end of file diff --git a/mosesdecoder/contrib/moses-speedtest/check_for_regression.py b/mosesdecoder/contrib/moses-speedtest/check_for_regression.py new file mode 100644 index 0000000000000000000000000000000000000000..1e269c0c671cb7d1a1134a954062a3c56a92d537 --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/check_for_regression.py @@ -0,0 +1,63 @@ +"""Checks if any of the latests tests has performed considerably different than + the previous ones. Takes the log directory as an argument.""" +import os +import sys +from testsuite_common import Result, processLogLine, bcolors, getLastTwoLines + +LOGDIR = sys.argv[1] #Get the log directory as an argument +PERCENTAGE = 5 #Default value for how much a test shoudl change +if len(sys.argv) == 3: + PERCENTAGE = float(sys.argv[2]) #Default is 5%, but we can specify more + #line parameter + +def printResults(regressed, better, unchanged, firsttime): + """Pretty print the results in different colours""" + if regressed != []: + for item in regressed: + print(bcolors.RED + "REGRESSION! " + item.testname + " Was: "\ + + str(item.previous) + " Is: " + str(item.current) + " Change: "\ + + str(abs(item.percentage)) + "%. Revision: " + item.revision\ + + bcolors.ENDC) + print('\n') + if unchanged != []: + for item in unchanged: + print(bcolors.BLUE + "UNCHANGED: " + item.testname + " Revision: " +\ + item.revision + bcolors.ENDC) + print('\n') + if better != []: + for item in better: + print(bcolors.GREEN + "IMPROVEMENT! " + item.testname + " Was: "\ + + str(item.previous) + " Is: " + str(item.current) + " Change: "\ + + str(abs(item.percentage)) + "%. Revision: " + item.revision\ + + bcolors.ENDC) + if firsttime != []: + for item in firsttime: + print(bcolors.PURPLE + "First time test! " + item.testname +\ + " Took: " + str(item.real) + " seconds. Revision: " +\ + item.revision + bcolors.ENDC) + + +all_files = os.listdir(LOGDIR) +regressed = [] +better = [] +unchanged = [] +firsttime = [] + +#Go through all log files and find which tests have performed better. +for logfile in all_files: + (line1, line2) = getLastTwoLines(logfile, LOGDIR) + log1 = processLogLine(line1) + if line2 == '\n': # Empty line, only one test ever run + firsttime.append(log1) + continue + log2 = processLogLine(line2) + res = Result(log1.testname, log1.real, log2.real, log2.revision,\ + log2.branch, log1.revision, log1.branch) + if res.percentage < -PERCENTAGE: + regressed.append(res) + elif res.change > PERCENTAGE: + better.append(res) + else: + unchanged.append(res) + +printResults(regressed, better, unchanged, firsttime) diff --git a/mosesdecoder/contrib/moses-speedtest/cronjob b/mosesdecoder/contrib/moses-speedtest/cronjob new file mode 100644 index 0000000000000000000000000000000000000000..4f7183a4802563b977df3401f06e7da245d51e46 --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/cronjob @@ -0,0 +1,7 @@ +#!/bin/sh +cd /home/moses-speedtest/phrase_tables + +python3 runtests.py -c testsuite_config #Run the tests. +python3 html_gen.py testsuite_config #Generate html + +cp index.html /fs/thor4/html/www/speed-test/ #Update the html \ No newline at end of file diff --git a/mosesdecoder/contrib/moses-speedtest/runtests.py b/mosesdecoder/contrib/moses-speedtest/runtests.py new file mode 100644 index 0000000000000000000000000000000000000000..19d601d42709bd1205b6d60b3dc667f3254c6949 --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/runtests.py @@ -0,0 +1,439 @@ +"""Given a config file, runs tests""" +import os +import subprocess +import time +import shutil +from argparse import ArgumentParser +from testsuite_common import processLogLine + +def parse_cmd(): + """Parse the command line arguments""" + description = "A python based speedtest suite for moses." + parser = ArgumentParser(description=description) + parser.add_argument("-c", "--configfile", action="store",\ + dest="configfile", required=True,\ + help="Specify test config file") + parser.add_argument("-s", "--singletest", action="store",\ + dest="singletestdir", default=None,\ + help="Single test name directory. Specify directory name,\ + not full path!") + parser.add_argument("-r", "--revision", action="store",\ + dest="revision", default=None,\ + help="Specify a specific revison for the test.") + parser.add_argument("-b", "--branch", action="store",\ + dest="branch", default=None,\ + help="Specify a branch for the test.") + + arguments = parser.parse_args() + return arguments + +def repoinit(testconfig, profiler=None): + """Determines revision and sets up the repo. If given the profiler optional + argument, wil init the profiler repo instead of the default one.""" + revision = '' + #Update the repo + if profiler == "gnu-profiler": + if testconfig.repo_prof is not None: + os.chdir(testconfig.repo_prof) + else: + raise ValueError('Profiling repo is not defined') + elif profiler == "google-profiler": + if testconfig.repo_gprof is not None: + os.chdir(testconfig.repo_gprof) + else: + raise ValueError('Profiling repo is not defined') + else: + os.chdir(testconfig.repo) + #Checkout specific branch, else maintain main branch + if testconfig.branch != 'master': + subprocess.call(['git', 'checkout', testconfig.branch]) + rev, _ = subprocess.Popen(['git', 'rev-parse', 'HEAD'],\ + stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + revision = str(rev).replace("\\n'", '').replace("b'", '') + else: + subprocess.call(['git checkout master'], shell=True) + + #Check a specific revision. Else checkout master. + if testconfig.revision: + subprocess.call(['git', 'checkout', testconfig.revision]) + revision = testconfig.revision + elif testconfig.branch == 'master': + subprocess.call(['git pull'], shell=True) + rev, _ = subprocess.Popen(['git rev-parse HEAD'], stdout=subprocess.PIPE,\ + stderr=subprocess.PIPE, shell=True).communicate() + revision = str(rev).replace("\\n'", '').replace("b'", '') + + return revision + +class Configuration: + """A simple class to hold all of the configuration constatns""" + def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None, repo_gprof=None): + self.repo = repo + self.repo_prof = repo_prof + self.repo_gprof = repo_gprof + self.drop_caches = drop_caches + self.tests = tests + self.testlogs = testlogs + self.basebranch = basebranch + self.baserev = baserev + self.singletest = None + self.revision = None + self.branch = 'master' # Default branch + + def additional_args(self, singletest, revision, branch): + """Additional configuration from command line arguments""" + self.singletest = singletest + if revision is not None: + self.revision = revision + if branch is not None: + self.branch = branch + + def set_revision(self, revision): + """Sets the current revision that is being tested""" + self.revision = revision + + +class Test: + """A simple class to contain all information about tests""" + def __init__(self, name, command, ldopts, permutations, prof_command=None, gprof_command=None): + self.name = name + self.command = command + self.prof_command = prof_command + self.gprof_command = gprof_command + self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet + self.permutations = permutations + +def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None, moses_gprof_repo=None): + """Parses the config file""" + command, ldopts, prof_command, gprof_command = '', '', None, None + permutations = [] + fileopen = open(conffile, 'r') + for line in fileopen: + line = line.split('#')[0] # Discard comments + if line == '' or line == '\n': + continue # Discard lines with comments only and empty lines + opt, args = line.split(' ', 1) # Get arguments + + if opt == 'Command:': + command = args.replace('\n', '') + if moses_prof_repo is not None: # Get optional command for profiling + prof_command = moses_prof_repo + '/bin/' + command + if moses_gprof_repo is not None: # Get optional command for google-perftools + gprof_command = moses_gprof_repo + '/bin/' + command + command = moses_repo + '/bin/' + command + elif opt == 'LDPRE:': + ldopts = args.replace('\n', '') + elif opt == 'Variants:': + permutations = args.replace('\n', '').replace(' ', '').split(',') + else: + raise ValueError('Unrecognized option ' + opt) + #We use the testdir as the name. + testcase = Test(testdir, command, ldopts, permutations, prof_command, gprof_command) + fileopen.close() + return testcase + +def parse_testconfig(conffile): + """Parses the config file for the whole testsuite.""" + repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', '' + basebranch, baserev, repo_prof_path, repo_gprof_path = '', '', None, None + fileopen = open(conffile, 'r') + for line in fileopen: + line = line.split('#')[0] # Discard comments + if line == '' or line == '\n': + continue # Discard lines with comments only and empty lines + opt, args = line.split(' ', 1) # Get arguments + if opt == 'MOSES_REPO_PATH:': + repo_path = args.replace('\n', '') + elif opt == 'DROP_CACHES_COMM:': + drop_caches = args.replace('\n', '') + elif opt == 'TEST_DIR:': + tests_dir = args.replace('\n', '') + elif opt == 'TEST_LOG_DIR:': + testlog_dir = args.replace('\n', '') + elif opt == 'BASEBRANCH:': + basebranch = args.replace('\n', '') + elif opt == 'BASEREV:': + baserev = args.replace('\n', '') + elif opt == 'MOSES_PROFILER_REPO:': # Optional + repo_prof_path = args.replace('\n', '') + elif opt == 'MOSES_GOOGLE_PROFILER_REPO:': # Optional + repo_gprof_path = args.replace('\n', '') + else: + raise ValueError('Unrecognized option ' + opt) + config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\ + basebranch, baserev, repo_prof_path, repo_gprof_path) + fileopen.close() + return config + +def get_config(): + """Builds the config object with all necessary attributes""" + args = parse_cmd() + config = parse_testconfig(args.configfile) + config.additional_args(args.singletestdir, args.revision, args.branch) + revision = repoinit(config) + if config.repo_prof is not None: + repoinit(config, "gnu-profiler") + if config.repo_gprof is not None: + repoinit(config, "google-profiler") + config.set_revision(revision) + return config + +def check_for_basever(testlogfile, basebranch): + """Checks if the base revision is present in the testlogs""" + filetoopen = open(testlogfile, 'r') + for line in filetoopen: + templine = processLogLine(line) + if templine.branch == basebranch: + return True + return False + +def split_time(filename): + """Splits the output of the time function into seperate parts. + We will write time to file, because many programs output to + stderr which makes it difficult to get only the exact results we need.""" + timefile = open(filename, 'r') + realtime = float(timefile.readline().replace('\n', '').split()[1]) + usertime = float(timefile.readline().replace('\n', '').split()[1]) + systime = float(timefile.readline().replace('\n', '').split()[1]) + timefile.close() + + return (realtime, usertime, systime) + + +def write_log(time_file, logname, config): + """Writes to a logfile""" + log_write = open(config.testlogs + '/' + logname, 'a') # Open logfile + date_run = time.strftime("%d.%m.%Y %H:%M:%S") # Get the time of the test + realtime, usertime, systime = split_time(time_file) # Get the times in a nice form + + # Append everything to a log file. + writestr = date_run + " " + config.revision + " Testname: " + logname +\ + " RealTime: " + str(realtime) + " UserTime: " + str(usertime) +\ + " SystemTime: " + str(systime) + " Branch: " + config.branch +'\n' + log_write.write(writestr) + log_write.close() + +def write_gprof(command, name, variant, config): + """Produces a gprof report from a gmon file""" + #Check if we have a directory for the profiling of this testcase: + output_dir = config.testlogs + '/' + name + if not os.path.exists(output_dir): + os.makedirs(output_dir) + outputfile = output_dir + '/' + time.strftime("%d.%m.%Y_%H:%M:%S") + '_' + name + '_' + variant + + #Compile a gprof command and output the file in the directory we just created + gmon_path = os.getcwd() + '/gmon.out' # Path to the profiling file + executable_path = command.split(' ')[0] # Path to the moses binary + gprof_command = 'gprof ' + executable_path + ' ' + gmon_path + ' > ' + outputfile + subprocess.call([gprof_command], shell=True) + os.remove(gmon_path) # After we are done discard the gmon file + +def write_pprof(name, variant, config): + """Copies the google-perftools profiler output to the corresponding test directory""" + output_dir = config.testlogs + '/' + name + if not os.path.exists(output_dir): + os.makedirs(output_dir) + outputfile = output_dir + '/pprof_' + time.strftime("%d.%m.%Y_%H:%M:%S") + '_' + name + '_' + variant + shutil.move("/tmp/moses.prof", outputfile) + + +def execute_test(command, path, name, variant, config, profile=None): + """Executes a testcase given a whole command, path to the test file output, + name of the test and variant tested. Config is the global configuration""" + subprocess.Popen([command], stdout=None, stderr=subprocess.PIPE, shell=True).communicate() + if profile is None: + write_log(path, name + '_' + variant, config) + elif profile == "gnu-profiler": # Basically produce a gmon output + write_gprof(command, name, variant, config) + elif profile == "google-profiler": + write_pprof(name, variant, config) + + +def execute_tests(testcase, cur_directory, config): + """Executes timed tests based on the config file""" + #Several global commands related to the time wrapper + time_command = ' time -p -o /tmp/time_moses_tests ' + time_path = '/tmp/time_moses_tests' + + #Figure out the order of which tests must be executed. + #Change to the current test directory + os.chdir(config.tests + '/' + cur_directory) + #Clear caches + subprocess.call(['sync'], shell=True) + subprocess.call([config.drop_caches], shell=True) + #Perform vanilla test and if a cached test exists - as well + print(testcase.name) + if 'vanilla' in testcase.permutations: + #Create the command for executing moses + whole_command = time_command + testcase.command + + #test normal and cached + execute_test(whole_command, time_path, testcase.name, 'vanilla', config) + if 'cached' in testcase.permutations: + execute_test(whole_command, time_path, testcase.name, 'vanilla_cached', config) + + #Now perform LD_PRELOAD tests + if 'ldpre' in testcase.permutations: + for opt in testcase.ldopts: + #Clear caches + subprocess.call(['sync'], shell=True) + subprocess.call([config.drop_caches], shell=True) + + #Create the command for executing moses: + whole_command = 'LD_PRELOAD=' + opt + time_command + testcase.command + variant = 'ldpre_' + opt + + #test normal and cached + execute_test(whole_command, time_path, testcase.name, variant, config) + if 'cached' in testcase.permutations: + execute_test(whole_command, time_path, testcase.name, variant + '_cached', config) + + #Perform profiling test. Mostly same as the above lines but necessary duplication. + #All actual code is inside execute_test so those lines shouldn't need modifying + if 'profile' in testcase.permutations: + subprocess.call(['sync'], shell=True) # Drop caches first + subprocess.call([config.drop_caches], shell=True) + + if 'vanilla' in testcase.permutations: + whole_command = testcase.prof_command + execute_test(whole_command, time_path, testcase.name, 'profile', config, "gnu-profiler") + if 'cached' in testcase.permutations: + execute_test(whole_command, time_path, testcase.name, 'profile_cached', config, "gnu-profiler") + + if 'ldpre' in testcase.permutations: + for opt in testcase.ldopts: + #Clear caches + subprocess.call(['sync'], shell=True) + subprocess.call([config.drop_caches], shell=True) + + #Create the command for executing moses: + whole_command = 'LD_PRELOAD=' + opt + " " + testcase.prof_command + variant = 'profile_ldpre_' + opt + + #test normal and cached + execute_test(whole_command, time_path, testcase.name, variant, config, "gnu-profiler") + if 'cached' in testcase.permutations: + execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, "gnu-profiler") + + #Google-perftools profiler + if 'google-profiler' in testcase.permutations: + subprocess.call(['sync'], shell=True) # Drop caches first + subprocess.call([config.drop_caches], shell=True) + + #Create the command for executing moses + whole_command = "CPUPROFILE=/tmp/moses.prof " + testcase.gprof_command + + #test normal and cached + execute_test(whole_command, time_path, testcase.name, 'vanilla', config, 'google-profiler') + if 'cached' in testcase.permutations: + execute_test(whole_command, time_path, testcase.name, 'vanilla_cached', config, 'google-profiler') + + #Now perform LD_PRELOAD tests + if 'ldpre' in testcase.permutations: + for opt in testcase.ldopts: + #Clear caches + subprocess.call(['sync'], shell=True) + subprocess.call([config.drop_caches], shell=True) + + #Create the command for executing moses: + whole_command = 'LD_PRELOAD=' + opt + " " + whole_command + variant = 'ldpre_' + opt + + #test normal and cached + execute_test(whole_command, time_path, testcase.name, variant, config, 'google-profiler') + if 'cached' in testcase.permutations: + execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, 'google-profiler') + + +# Go through all the test directories and executes tests +if __name__ == '__main__': + CONFIG = get_config() + ALL_DIR = os.listdir(CONFIG.tests) + + #We should first check if any of the tests is run for the first time. + #If some of them are run for the first time we should first get their + #time with the base version (usually the previous release) + FIRSTTIME = [] + TESTLOGS = [] + #Strip filenames of test underscores + for listline in os.listdir(CONFIG.testlogs): + listline = listline.replace('_vanilla', '') + listline = listline.replace('_cached', '') + listline = listline.replace('_ldpre', '') + TESTLOGS.append(listline) + for directory in ALL_DIR: + if directory not in TESTLOGS: + FIRSTTIME.append(directory) + + #Sometimes even though we have the log files, we will need to rerun them + #Against a base version, because we require a different baseversion (for + #example when a new version of Moses is released.) Therefore we should + #Check if the version of Moses that we have as a base version is in all + #of the log files. + + for logfile in os.listdir(CONFIG.testlogs): + logfile_name = CONFIG.testlogs + '/' + logfile + if os.path.isfile(logfile_name) and not check_for_basever(logfile_name, CONFIG.basebranch): + logfile = logfile.replace('_vanilla', '') + logfile = logfile.replace('_cached', '') + logfile = logfile.replace('_ldpre', '') + FIRSTTIME.append(logfile) + FIRSTTIME = list(set(FIRSTTIME)) #Deduplicate + + if FIRSTTIME != []: + #Create a new configuration for base version tests: + BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\ + CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\ + CONFIG.baserev, CONFIG.repo_prof, CONFIG.repo_gprof) + BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch) + #Set up the repository and get its revision: + REVISION = repoinit(BASECONFIG) + BASECONFIG.set_revision(REVISION) + #Build + os.chdir(BASECONFIG.repo) + subprocess.call(['./previous.sh'], shell=True) + #If profiler configuration exists also init it + if BASECONFIG.repo_prof is not None: + repoinit(BASECONFIG, "gnu-profiler") + os.chdir(BASECONFIG.repo_prof) + subprocess.call(['./previous.sh'], shell=True) + + if BASECONFIG.repo_gprof is not None: + repoinit(BASECONFIG, "google-profiler") + os.chdir(BASECONFIG.repo_gprof) + subprocess.call(['./previous.sh'], shell=True) + + #Perform tests + for directory in FIRSTTIME: + cur_testcase = parse_configfile(BASECONFIG.tests + '/' + directory +\ + '/config', directory, BASECONFIG.repo, BASECONFIG.repo_prof, BASECONFIG.repo_gprof) + execute_tests(cur_testcase, directory, BASECONFIG) + + #Reset back the repository to the normal configuration + repoinit(CONFIG) + if BASECONFIG.repo_prof is not None: + repoinit(CONFIG, "gnu-profiler") + + if BASECONFIG.repo_gprof is not None: + repoinit(CONFIG, "google-profiler") + + #Builds moses + os.chdir(CONFIG.repo) + subprocess.call(['./previous.sh'], shell=True) + if CONFIG.repo_prof is not None: + os.chdir(CONFIG.repo_prof) + subprocess.call(['./previous.sh'], shell=True) + + if CONFIG.repo_gprof is not None: + os.chdir(CONFIG.repo_gprof) + subprocess.call(['./previous.sh'], shell=True) + + if CONFIG.singletest: + TESTCASE = parse_configfile(CONFIG.tests + '/' +\ + CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof) + execute_tests(TESTCASE, CONFIG.singletest, CONFIG) + else: + for directory in ALL_DIR: + cur_testcase = parse_configfile(CONFIG.tests + '/' + directory +\ + '/config', directory, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof) + execute_tests(cur_testcase, directory, CONFIG) diff --git a/mosesdecoder/contrib/moses-speedtest/sys_drop_caches.py b/mosesdecoder/contrib/moses-speedtest/sys_drop_caches.py new file mode 100644 index 0000000000000000000000000000000000000000..d4796e0905b99a9a0aeaf6a828e11c16d4ceb395 --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/sys_drop_caches.py @@ -0,0 +1,22 @@ +#!/usr/bin/spython +from sys import argv, stderr, exit +from os import linesep as ls +procfile = "/proc/sys/vm/drop_caches" +options = ["1","2","3"] +flush_type = None +try: + flush_type = argv[1][0:1] + if not flush_type in options: + raise IndexError, "not in options" + with open(procfile, "w") as f: + f.write("%s%s" % (flush_type,ls)) + exit(0) +except IndexError, e: + stderr.write("Argument %s required.%s" % (options, ls)) +except IOError, e: + stderr.write("Error writing to file.%s" % ls) +except StandardError, e: + stderr.write("Unknown Error.%s" % ls) + +exit(1) + diff --git a/mosesdecoder/contrib/moses-speedtest/test_config b/mosesdecoder/contrib/moses-speedtest/test_config new file mode 100644 index 0000000000000000000000000000000000000000..4a480f49634547be7f8f3f40c03c13e8741a6719 --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/test_config @@ -0,0 +1,3 @@ +Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config +LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/, +Variants: vanilla, cached, ldpre #Can't have cached without ldpre or vanilla diff --git a/mosesdecoder/contrib/moses-speedtest/testsuite_config b/mosesdecoder/contrib/moses-speedtest/testsuite_config new file mode 100644 index 0000000000000000000000000000000000000000..b6ad6181c51f7b0b1dda30e035c319f97d77adfe --- /dev/null +++ b/mosesdecoder/contrib/moses-speedtest/testsuite_config @@ -0,0 +1,5 @@ +MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder +DROP_CACHES_COMM: sys_drop_caches 3 +TEST_DIR: /home/moses-speedtest/phrase_tables/tests +TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs +BASEBRANCH: RELEASE-2.1.1 \ No newline at end of file diff --git a/mosesdecoder/contrib/picaro/README b/mosesdecoder/contrib/picaro/README new file mode 100644 index 0000000000000000000000000000000000000000..f21a2c09875a6d36051a3e094018fd9499816b39 --- /dev/null +++ b/mosesdecoder/contrib/picaro/README @@ -0,0 +1,62 @@ +README - 16 Jan 2011b +Author: Jason Riesa + +Picaro [v1.0]: A simple command-line alignment visualization tool. +Visualize alignments in grid-format. + +This brief README is organized as follows: +I. REQUIREMENTS +II. USAGE +III. INPUT FORMAT +IV. EXAMPLE USAGE +V. NOTES + +I. REQUIREMENTS +=============== +Python v2.5 or higher is required. + +II. USAGE +========= +Picaro takes as input 3 mandatory arguments and up to 2 optional arguments: +Mandatory arguments: +1. -a1 where alignment1 is a path to an alignment file +2. -e where e is a path to a file of English sentences +3. -f where f is a path to a file of French sentences +Optional arguments: +1. -a2 path to alignment2 file in f-e format +2. -maxlen for each sentence pair, render only when each + sentence has length in words <= len + +For historical reasons we use the labels e, f, English, and French, +but any language pair will do. + +III. INPUT FORMAT +================= +- Files e and f must be sentence-aligned +- Alignment files must be in f-e format +See included sample files in zh/ and es/. + +IV. EXAMPLE USAGE +================= +WITH A SINGLE ALIGNMENT: +$ picaro.py -e zh/sample.e -f zh/sample.f -a1 zh/sample.aln + +COMPARING TWO ALIGNMENTS: +$ picaro.py -e zh/sample.e -f zh/sample.f -a1 zh/alternate.aln -a2 zh/sample.aln + +When visualizing two alignments at once, refer to the following color scheme: +Green blocks: alignments a1 and a2 agree +Blue blocks: alignment a1 only +Gold blocks: alignment a2 only + +V. NOTES +======== +RIGHT-TO-LEFT TEXT: +If you are using right-to-left text, e.g. Arabic, transliterate your text first. +Terminals generally render unexpectedly with mixed left-to-right and right-to-left text. +For Arabic, in particular, we use the Buckwalter translitation scheme [1] when using this tool. +The following Perl module implements Buckwalter transliteration: +http://search.cpan.org/~smrz/Encode-Arabic-1.8/lib/Encode/Arabic.pm + +[1] http://www.ldc.upenn.edu/myl/morph/buckwalter.html + diff --git a/mosesdecoder/contrib/picaro/es/README b/mosesdecoder/contrib/picaro/es/README new file mode 100644 index 0000000000000000000000000000000000000000..f686bdc1dcb446d09b9325f19388bf641547af8b --- /dev/null +++ b/mosesdecoder/contrib/picaro/es/README @@ -0,0 +1,4 @@ +Spanish-English sample +sample.f Spanish text +sample.e English text +sample.a Alignment file with links in f-e format diff --git a/mosesdecoder/contrib/picaro/es/sample.aln b/mosesdecoder/contrib/picaro/es/sample.aln new file mode 100644 index 0000000000000000000000000000000000000000..4e27a62fd95b094c6a33bc083eac41ce9578fb9a --- /dev/null +++ b/mosesdecoder/contrib/picaro/es/sample.aln @@ -0,0 +1 @@ +0-0 0-1 1-2 1-3 2-4 3-5 4-6 5-7 diff --git a/mosesdecoder/contrib/picaro/es/sample.e b/mosesdecoder/contrib/picaro/es/sample.e new file mode 100644 index 0000000000000000000000000000000000000000..8a15de71093f30c6531bd6cfcf100c02fd14296c --- /dev/null +++ b/mosesdecoder/contrib/picaro/es/sample.e @@ -0,0 +1 @@ +i want to go to spain tomorrow . diff --git a/mosesdecoder/contrib/picaro/es/sample.f b/mosesdecoder/contrib/picaro/es/sample.f new file mode 100644 index 0000000000000000000000000000000000000000..86213a2550b075a8147f4f3eefb59726a213d045 --- /dev/null +++ b/mosesdecoder/contrib/picaro/es/sample.f @@ -0,0 +1 @@ +quiero ir a españa mañana . diff --git a/mosesdecoder/contrib/picaro/picaro.py b/mosesdecoder/contrib/picaro/picaro.py new file mode 100644 index 0000000000000000000000000000000000000000..3eb94a36c0a7ebbcc150b26edaffb655773cbfb3 --- /dev/null +++ b/mosesdecoder/contrib/picaro/picaro.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python +# +# Picaro: An simple command-line alignment visualization tool. +# +# picaro.py +# Visualize alignments between sentences in a grid format. +# +# Jason Riesa +# version: 01-16-2010 +# +# Copyright (C) 2013 Jason Riesa +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import sys, os, commands +from collections import defaultdict + +#TC_BIN = "tc/tc.linux32" + +a1_file_str = "" +a2_file_str = "" +f_file_str = "" +e_file_str = "" +SHOW_TC_A1 = 0 +SHOW_TC_A2 = 0 +maxlen = float('inf') + +# Process command line options +try: + while len(sys.argv) > 1: + option = sys.argv[1]; del sys.argv[1] + if option == '-a1': + a1_file_str = sys.argv[1]; del sys.argv[1] + elif option == '-a2': + a2_file_str = sys.argv[1]; del sys.argv[1] + elif option == '-f': + f_file_str = sys.argv[1]; del sys.argv[1] + elif option == '-e': + e_file_str = sys.argv[1]; del sys.argv[1] + elif option == '-maxlen': + maxlen = int(sys.argv[1]); del sys.argv[1] + else: + sys.stderr.write("Invalid option: %s\n" % (option)) + sys.exit(1) + ''' + elif option == '-tc': + if sys.argv[1] == '1': + SHOW_TC_A1 = 1; del sys.argv[1] + elif sys.argv[1] == '2': + SHOW_TC_A2 = 2; del sys.argv[1] + else: + raise Exception, "Invalid argument to option -tc" + ''' + + if a1_file_str == "" or f_file_str == "" or e_file_str == "": + raise Exception, "Not all options properly specified." + # Make sure transitive closure binary exists if user has enabled this option + if SHOW_TC_A1 or SHOW_TC_A2: + if not os.path.exists(TC_BIN): + raise Exception, "Transitive closure binary "+TC_BIN+" not found." +except Exception, msg: + sys.stderr.write("%s: %s\n" % (sys.argv[0], msg)) + sys.stderr.write("Usage: %s: -a1 -f -e [-a2 ]\n" % (sys.argv[0])) + sys.stderr.write("Mandatory arguments:\n") + sys.stderr.write(" -a1 \t path to alignment 1 file in f-e format\n") + sys.stderr.write(" -f \t\t path to source text f\n") + sys.stderr.write(" -e \t\t path to target text e\n") + sys.stderr.write("Optional arguments:\n") + sys.stderr.write(" -a2 \t path to alignment 2 file in f-e format\n") + sys.stderr.write(" -maxlen \t display alignment only when e and f have length <= len\n") + sys.exit(1) + + +a_file = open(a1_file_str, 'r') +f_file = open(f_file_str, 'r') +e_file = open(e_file_str, 'r') +if a2_file_str != "": + a2_file = open(a2_file_str, 'r') + +sentenceNumber = 0 +nextRequested = 1 +for aline in a_file: + eline = e_file.readline() + fline = f_file.readline() + if a2_file_str != "": + a2line = a2_file.readline() + + links = aline.split() + e_words = eline.split() + f_words = fline.split() + if a2_file_str != "": + links2 = a2line.split() + + # Get transitive closure of links and links2 + if SHOW_TC_A1: + cmd = 'echo "' + ' '.join(links) + '" | ' + TC_BIN + failure1, output1 = commands.getstatusoutput(cmd) + tc1 = output1.split() + if SHOW_TC_A2: + cmd = 'echo "' + ' '.join(links2) + '" | ' + TC_BIN + failure2, output2 = commands.getstatusoutput(cmd) + tc2 = output2.split() + + # Update tracking counts + sentenceNumber += 1 + if sentenceNumber < nextRequested: + continue + + # Don't generate alignment grids for very large sentences + if len(e_words) > maxlen or len(f_words) > maxlen: + continue + + + print "== SENTENCE ",sentenceNumber," ==" + + # Initialize alignment objects + # a holds alignments of user-specified -a1 + # a2 holds alignments of user-specified -a2 + a = defaultdict(lambda: defaultdict(int)) + a2 = defaultdict(lambda: defaultdict(int)) + + # Print e_words on the columns + # First, find the length of the longest word + longestEWordSize = 0 + longestEWord = 0 + for w in e_words: + if len(w) > longestEWordSize: + longestEWordSize = len(w) + longestEWord = w + + # Now, print the e-words + for i in range(longestEWordSize, 0, -1): + for w in e_words: + if len(w) < i: + print " ", + else: + print w[(i*-1)], + print + + + # Fill in alignment matrix 1 + for link in links: + i, j = map(int, link.split('-')) + a[int(i)][int(j)] = 1 + # Fill in extra links added by transitive closure + if SHOW_TC_A1: + for link in tc1: + i, j = map(int, link.split('-')) + if(a[i][j] != 1): + a[i][j] = 2 + + # Fill in alignment matrix 2 + if(a2_file_str != ""): + for link in links2: + i, j = map(int, link.split('-')) + a2[i][j] = 1 + # Fill in extra links added by transitive closure + if SHOW_TC_A2: + for link in tc2: + i, j = map(int, link.split('-')) + if(a2[i][j] != 1): + a2[i][j] = 2 + + # Print filled-in alignment matrix + if a2_file_str == "": + for i, _ in enumerate(f_words): + for j, _ in enumerate(e_words): + val1 = a[i][j] + if val1 == 0: + # No link + print ':', + elif val1 == 1: + # Regular link + print u'\u001b[44m\u0020\u001b[0m', + elif val1 == 2: + # Link due to transitive closure + # Render as gray-shaded square + print 'O', + print f_words[i] + print + else: + for i, _ in enumerate(f_words): + for j, _ in enumerate(e_words): + val1 = a[i][j] + val2 = a2[i][j] + + if val1 == 0 and val2 == 0: + # Link not in a nor a2 + # Empty grid box + print ':', + # Link in both a and a2 + elif val1 > 0 and val2 > 0: + # Green box + if val1 == 1: + if val2 == 1: + print u'\u001b[42m\u001b[1m\u0020\u001b[0m', + elif val2 == 2: + print u'\u001b[42m\u001b[30m2\u001b[0m', + elif val1 == 2: + if val2 == 1: + print u'\u001b[42m\u0020\u001b[0m', + elif val2 == 2: + print u'\u001b[42m\u001b[30m3\u001b[0m', + # Link in a2, but not a + elif val1 == 0 and val2 > 0: + if val2 == 1: + # Yellow box + print u'\u001b[1m\u001b[43m\u0020\u001b[0m', + elif val2 == 2: + # Artificial link by transitive closure + print u'\u001b[43m\u001b[30m2\u001b[0m', + + # Link in a, but not a2 + elif val1 > 0 and val2 == 0: + if val1 == 1: + # Blue box + print u'\u001b[1m\u001b[44m\u0020\u001b[0m', + elif val1 == 2: + print u'\u001b[44m\u001b[37m1\u001b[0m', + print f_words[i] + nextDefault = sentenceNumber + 1 + sys.stdout.write("Enter next alignment number or 'q' to quit [%d]: " %(nextDefault)) + user_input = sys.stdin.readline().strip() + if user_input == "": + nextRequested = nextDefault + elif user_input[0] == "q" or user_input == "quit": + sys.exit(1) + else: + try: + nextRequested = int(user_input) + except: + nextRequested = sentenceNumber + 1 + sys.stdout.write("Unknown alignment id: %s\nContinuing with %d.\n" %(user_input, nextRequested)) + +a_file.close() +e_file.close() +f_file.close() + diff --git a/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.idx b/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.idx new file mode 100644 index 0000000000000000000000000000000000000000..7e594515c7536841efa4a28dba5b36a952479936 Binary files /dev/null and b/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.idx differ diff --git a/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.idx b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.idx new file mode 100644 index 0000000000000000000000000000000000000000..823698b631100534e33ef1d799f0a23ef1af24f1 Binary files /dev/null and b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.idx differ diff --git a/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srctree.wa b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srctree.wa new file mode 100644 index 0000000000000000000000000000000000000000..6bdbb6e133292f3fae838a95eb49e04c76051781 Binary files /dev/null and b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srctree.wa differ diff --git a/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srcvoc b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srcvoc new file mode 100644 index 0000000000000000000000000000000000000000..c958b8c828c5b16c59c5d1da4a6abcbf80517dc0 --- /dev/null +++ b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srcvoc @@ -0,0 +1,9 @@ +8 domando +7 del +6 lados +5 ambos +4 en +3 políticos +2 los +1 a +0 Atlántico diff --git a/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtvoc b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtvoc new file mode 100644 index 0000000000000000000000000000000000000000..cad1a43448d9baf21abc4cd8d1d1df2c466860d9 --- /dev/null +++ b/mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtvoc @@ -0,0 +1,4432 @@ +4431 world-class +4430 starvation +4429 politician-proof +4428 unrest +4427 slide +4426 moves +4425 occupation +4424 conscience +4423 detained +4422 Political +4421 • +4420 ances +4419 youthful +4418 writings +4417 would-be +4416 wisely +4415 wild +4414 waste +4413 wage +4412 rules +4411 volts +4410 violent +4409 violators +4408 villains +4407 vernacular +4406 valuable +4405 utilities +4404 users +4403 unrelenting +4402 unpredictable +4401 universal +4400 unarrested +4399 tyrants +4398 twin +4397 twenty-four +4396 tribunals +4395 treasury +4394 comply +4393 trawlers +4392 travel +4391 translate +4390 transaction +4389 traffic +4388 traders +4387 trade-distorting +4386 topics +4385 tolerant +4384 ties +4383 aged +4382 theorists +4381 criticism +4380 constitutive +4379 warring +4378 vital +4377 victorious +4376 trafficked +4375 floors +4374 trading +4373 commons +4372 territorial +4371 table +4370 stipulated +4369 shelling +4368 sex +4367 seeds +4366 science +4365 rumor +4364 rug +4363 spectacles +4362 river +4361 residential +4360 pools +4359 relevant +4358 regions +4357 regional +4356 realization +4355 profit +4354 principle +4353 police +4352 pipeline +4351 perils +4350 parliamentary +4349 painful +4348 outrages +4347 reaches +4346 outer +4345 ? +4344 organizational +4343 organization +4342 occurrence +4341 noise +4340 multitude +4339 modified +4338 misery +4337 miraculous +4336 minimum +4335 management +4334 tail +4333 series +4332 lowest +4331 lips +4330 lasting +4329 interim +4328 initial +4327 implication +4326 imagination +4325 humanities +4324 hemisphere +4323 heartland +4322 gullible +4321 governors +4320 globe +4319 generation +4318 forest +4317 forecasts +4316 fat +4315 fastest +4314 expense +4313 expanded +4312 exciting +4311 exceedingly +4310 ears +4309 drastic +4308 deployment +4307 departing +4306 decades-old +4305 crucial +4304 continent +4303 consequences +4302 comedy +4301 brunt +4300 text +4299 best-performing +4298 attendant +4297 assistance +4296 aging +4295 UAE +4294 SADC +4293 Priority +4292 Poles +4291 Economy +4290 Liberal +4289 Jerusalem +4288 ICTY +4287 Ganges +4286 FSA +4285 Britannica +4284 Encyclopedia +4283 lobbyists +4282 DPJ +4281 Party +4280 Communist +4279 CDS +4278 1996 +4277 tested +4276 tempt +4275 teams +4274 teaching +4273 tanks +4272 talent-friendly +4271 tackled +4270 tackle +4269 systems +4268 switch +4267 swinging +4266 swimming +4265 surprise +4264 supervising +4263 supervise +4262 suicide +4261 sufferers +4260 successors +4259 successor +4258 successes +4257 subsidized +4256 subsidies +4255 subsequent +4254 subjects +4253 strike +4252 stretch +4251 stemming +4250 state-funded +4249 sprawling +4248 spent-fuel +4247 spawned +4246 somehow +4245 solving +4244 software +4243 smooth +4242 shocks +4241 shareholder +4240 sexuality +4239 servicing +4238 serf-owning +4237 separates +4236 sentiments +4235 self-employment +4234 securities +4233 sectors +4232 secrets +4231 second-round +4230 second-generation +4229 sclerotic +4228 scientists +4227 scientific +4226 schemes +4225 scenarios +4224 saving +4223 sanctioning +4222 salty +4221 rulers +4220 waves +4219 route +4218 flag +4217 rooms +4216 roll +4215 rocket +4214 roads +4213 rewarded +4212 retirees +4211 retained +4210 retain +4209 retail +4208 restrict +4207 restaurants +4206 residents +4205 unsuccessful +4204 render +4203 removed +4202 relying +4201 regulatory +4200 regimens +4199 refugees +4198 red +4197 records +4196 recording +4195 recipients +4194 recalcitrant +4193 rebel +4192 reason +4191 rating +4190 raters +4189 random +4188 radicals +4187 quality-management +4186 pundits +4185 providers +4184 protesters +4183 prospective +4182 ownership +4181 property +4180 projects +4179 profitable +4178 producers +4177 procedures +4176 probable +4175 privately +4174 prisoners +4173 prescribed +4172 premature +4171 practitioners +4170 reckoning +4169 postponing +4168 possibly +4167 moods +4166 popular +4165 pool +4164 pony-tailed +4163 polar +4162 plunges +4161 plastic +4160 planes +4159 picked +4158 periodic +4157 peoples +4156 payouts +4155 payment +4154 payers +4153 parliaments +4152 parliamentarians +4151 papers +4150 packaged +4149 overly +4148 outstanding +4147 outcomes +4146 opponents +4145 biggest +4144 reaping +4143 sacrosanct +4142 oligarchs +4141 old-fashioned +4140 old-age +4139 officers +4138 offers +4137 arises +4136 humanity +4135 Exchequer +4134 shaping +4133 oceans +4132 observers +4131 obscure +4130 numerical +4129 now-plummeting +4128 revolutionary +4127 non-scientific +4126 non-democracies +4125 non-NATO +4124 newspaper +4123 low-energy +4122 network +4121 natural +4120 flags +4119 museums +4118 multiple-family +4117 multilateral +4116 movements +4115 motor +4114 motives +4113 motherhood +4112 scenario +4111 optimistic +4110 mosquito +4109 mortgage +4108 moral +4107 mood +4106 money-market +4105 moderated +4104 model +4103 mixed +4102 mislead +4101 minority +4100 migrants +4099 middle-income +4098 microbes +4097 messier +4096 memories +4095 medieval +4094 barrage +4093 mechanics +4092 market-neutral +4091 march +4090 manufacturers +4089 mandatory +4088 managers +4087 managerial +4086 youth +4085 male +4084 medical +4083 worse +4082 lowering +4081 low-density +4080 loose +4079 longstanding +4078 long-terms +4077 simmering +4076 lofty +4075 lobby +4074 lobbies +4073 links +4072 limits +4071 libertarian +4070 liberate +4069 liberals +4068 lenders +4067 legitimate +4066 legislators +4065 learning +4064 lawyers +4063 lawmakers +4062 knowledge +4061 knit +4060 junk +4059 partners +4058 judicial +4057 jihadis +4056 constituent +4055 isolate +4054 intravenous +4053 interpret +4052 interest-rate +4051 interact +4050 intelligence +4049 intellectual +4048 instant +4047 injured +4046 industrial-country +4045 individuals +4044 firms +4043 increases +4042 lending +4041 incomes +4040 improved +4039 import +4038 impending +4037 migrant +4036 ill-educated +4035 identity +4034 idealized +4033 humanitarians +4032 human-rights +4031 home-lending +4030 values +4029 holders +4028 musical +4027 hip +4026 hikes +4025 high-income +4024 hearts +4023 handling +4022 goods +4021 guys +4020 gridlock +4019 graduates +4018 gods +4017 public-goods +4016 genes +4015 generals +4014 gas-guzzling +4013 garbage +4012 fundamental +4011 frightened +4010 free-trade +4009 fourth +4008 owners +4007 forests +4006 foods +4005 food-price +4004 food +4003 flows +4002 streams +4001 flowing +4000 floating +3999 fixed +3998 fish +3997 types +3996 finance +3995 filings +3994 fighters +3993 feelings +3992 farm +3991 family-planning +3990 fading +3989 experts +3988 experimental +3987 expatriate +3986 Last +3985 exorcising +3984 exclude +3983 exchange-rate +3982 excessive +3981 exaggerated +3980 evacuate +3979 ethnically +3978 essence +3977 espoused +3976 escaping +3975 escalating +3974 equity +3973 entrepreneurs +3972 engines +3971 energy-efficient +3970 endless +3969 enclaves +3968 flexibility +3967 employers +3966 employees +3965 empires +3964 embraced +3963 fraud +3962 electoral +3961 eating +3960 earnings +3959 duties +3958 run-up +3957 drugs +3956 dreams +3955 downside +3954 donor +3953 donating +3952 alternatives +3951 produced +3950 domestically +3949 distressing +3948 disregard +3947 displaced +3946 disease +3945 discussions +3944 discoveries +3943 dipped +3942 die-hard +3941 dictators +3940 dictates +3939 diagnosed +3938 devils +3937 developments +3936 detainee +3935 destroyed +3934 designing +3933 deposits +3932 deny +3931 defunct +3930 fears +3929 deflation +3928 deepwater +3927 decreased +3926 makers +3925 death-penalty +3924 dealt +3923 damages +3922 customers +3921 curtailing +3920 cry +3919 heads +3918 crowned +3917 searched +3916 investigators +3915 credits +3914 creditors +3913 crack +3912 surrender +3911 corporate +3910 propositions +3909 component +3908 controlling +3907 control +3906 contract +3905 continuity +3904 Duhalde +3903 contacts +3902 consuming +3901 consumer-confidence +3900 consumer +3899 consultant +3898 confused +3897 conflicts +3896 concerts +3895 comprehensive +3894 communist +3893 commercial +3892 cohorts +3891 coastal +3890 coal-state +3889 clubs +3888 club +3887 clinicians +3886 clever +3885 critics +3884 clearly +3883 clean +3882 civilians +3881 engineering +3880 civil +3879 civic +3878 city +3877 child +3876 cheap +3875 charitable-campaign +3874 channel +3873 championships +3872 inalienable +3871 centrifuge +3870 central-bank +3869 caution +3868 problem +3867 casts +3866 cars +3865 capture +3864 candidate +3863 camps +3862 campaigners +3861 murderous +3860 shops +3859 coffee +3858 health-services +3857 consumers +3856 buyers +3855 three +3854 sovereign +3853 businessmen +3852 bursting +3851 burn +3850 budget +3849 litigation +3848 bread +3847 brand-name +3846 brain +3845 bought +3844 borrowing +3843 border-enforcement +3842 boosting +3841 books +3840 bonds +3839 bond-market +3838 bond +3837 bombers +3836 bodies +3835 boards +3834 black-box +3833 bilateral +3832 swings +3831 besieged +3830 peak +3829 belongs +3828 five +3827 designated +3826 distorted +3825 smaller +3824 beans +3823 tuned +3822 punished +3821 mining +3820 blinded +3819 bankers +3818 sheets +3817 extremists +3816 baby +3815 babies +3814 awash +3813 regimes +3812 audit +3811 attachments +3810 seams +3809 art +3808 assurances +3807 asset +3806 boys +3805 arrangement +3804 triggered +3803 aren +3802 approving +3801 appropriate +3800 approaches +3799 appeals +3798 mutual +3797 religious +3796 programs +3795 specimen +3794 skeletons +3793 have-nots +3792 products +3791 dairy +3790 ambitious +3789 tempting +3788 always +3787 contracts +3786 revoke +3785 commitments +3784 insults +3783 alleging +3782 sensitivity +3781 aides +3780 agreed-upon +3779 advances +3778 advanced-country +3777 administrations +3776 adjustments +3775 education +3774 primary +3773 transparency +3772 accountability +3771 addresses +3770 activated +3769 outcome +3768 accountable +3767 abuses +3766 stunning +3765 Yemen +3764 We +3763 Venezuela +3762 Ukrainian +3761 UN +3760 U.S. +3759 Tricks +3758 Times +3757 Three +3756 Places +3755 Holy +3754 TV +3753 Sunni +3752 States-based +3751 States-China +3750 Spanish +3749 Spain +3748 Socialists +3747 Sick +3746 Shi +3745 SSA +3744 Republican +3743 Rates +3742 Polish +3741 Peruvian +3740 Pay +3739 Problems +3738 OECD +3737 New +3736 Net +3735 Muslim +3734 Moldova +3733 Mental +3732 Mau +3731 Markets +3730 MDG +3729 Los +3728 Latins +3727 Jordanian +3726 Jordan +3725 Jews +3724 maritime +3723 Islamists +3722 Islamic-minded +3721 Islamic +3720 Iranian +3719 Inter-Services +3718 Igbo +3717 Humans +3716 Hu +3715 Hirst +3714 Himalayan +3713 Hatoyama +3712 Green +3711 Greek-style +3710 Germanys +3709 Gaza +3708 GDP-linked +3707 Funds +3706 Federal +3705 Dutch +3704 Disorders +3703 Countries +3702 Developing +3701 Democrats +3700 Democratic +3699 D. +3698 Constitutional +3697 Constitution +3696 Confucian +3695 Club +3694 Take +3693 Children +3692 Chiang +3691 Chechen +3690 Catherine +3689 Brazilians +3688 Bosnia +3687 Azerbaijan +3686 Awards +3685 Asians +3684 Arab +3683 Animals +3682 Republicans +3681 Alpine +3680 Agreements +3679 enzymes +3678 APP +3677 inhibiting +3676 A-peptide +3675 students +3674 47 +3673 professionals +3672 160 +3671 .. +3670 1860 +3669 implicitly +3668 Nehru +3667 Muslims +3666 core +3665 telephones +3664 industrial +3663 transport +3662 innovation +3661 handicapped +3660 imbalances +3659 obscures +3658 foreseeing +3657 neither +3656 autos +3655 los +3654 sovereign-debt +3653 prices +3652 receiving +3651 dominate +3650 gamma-secretase +3649 beta-secretase +3648 slogan +3647 Yushchenko +3646 cited +3645 Wikipedia +3644 contain +3643 calculations +3642 spiritual +3641 sleek +3640 restraints +3639 population +3638 highways +3637 labeled +3636 gross +3635 fortunes +3634 mean +3633 alike +3632 sins +3631 politician +3630 mainstream +3629 extremist +3628 Macedonian +3627 écoles +3626 worn +3625 working-class +3624 woman +3623 ministry +3622 diplomacy +3621 wishing +3620 whole +3619 whiff +3618 wherever +3617 whereby +3616 life-threatening +3615 coordination +3614 convicts +3613 welcome +3612 weight +3611 wear +3610 watched +3609 warts +3608 wars +3607 war +3606 vouchers +3605 voted +3604 vindicated +3603 vicinity +3602 uttered +3601 usefully +3600 usage +3599 urban +3598 upstream +3597 upheld +3596 upheaval +3595 unstuck +3594 modern +3593 uniquely +3592 underscored +3591 underground +3590 truly +3589 tree +3588 treading +3587 transit +3586 traction +3585 traced +3584 tour +3583 turns +3582 stick +3581 recognize +3580 high-security +3579 vigor +3578 areas +3577 establish +3576 diminish +3575 boil +3574 thus +3573 threatened +3572 thinking +3571 underpinnings +3570 term +3569 temple +3568 television +3567 teeth +3566 technologically +3565 targets +3564 tankers +3563 talks +3562 tagged +3561 sweeping +3560 supported +3559 summit +3558 warrant +3557 sufficient +3556 successful +3555 succeeded +3554 stumbled +3553 student +3552 stuck +3551 struggling +3550 stronger +3549 strength +3548 strategically +3547 stories +3546 stolen +3545 vilify +3544 stiffen +3543 stays +3542 stay +3541 stationed +3540 station +3539 start-up +3538 standards +3537 stance +3536 stalled +3535 stakes +3534 staged +3533 stage +3532 squarely +3531 spoken +3530 split +3529 spectacularly +3528 specifically +3527 specific +3526 speaking +3525 sorts +3524 somewhat +3523 sometimes +3522 sometime +3521 sold +3520 soft +3519 soccer +3518 volume +3517 smashed +3516 slow +3515 slippery +3514 slightly +3513 slashed +3512 sitting +3511 sinking +3510 similar +3509 heightened +3508 significantly +3507 shows +3506 shot +3505 shop +3504 stood +3503 shares +3502 shape +3501 sex-crime +3500 seriously +3499 sensitive +3498 sensing +3497 sense +3496 displays +3495 self-serving +3494 self-interested +3493 self-fulfilling +3492 seized +3491 seismic +3490 secured +3489 secretly +3488 secret +3487 scrutinized +3486 sciences +3485 scattered +3484 scandals +3483 savings +3482 save +3481 sat +3480 worked +3479 sanctioned +3478 salient +3477 sales +3476 safely +3475 runs +3474 row +3473 rosy +3472 ring +3471 rigorously +3470 rigged +3469 rig +3468 rich +3467 rewarding +3466 revised +3465 revealed +3464 ill-gotten +3463 retire +3462 resulting +3461 responses +3460 responding +3459 resource-poor +3458 reside +3457 requests +3456 reputation +3455 represented +3454 repetition +3453 remember +3452 remains +3451 uneducated +3450 relatively +3449 regarded +3448 recognized +3447 recalls +3446 reasons +3445 realized +3444 re-established +3443 ranked +3442 rampant +3441 raised +3440 quoted +3439 quit +3438 quipped +3437 purpose +3436 proxies +3435 provoked +3434 prove +3433 prospects +3432 proposition +3431 properties +3430 prominent +3429 pro-independence +3428 pro-consumption +3427 recipient +3426 privately-owned +3425 prevailing +3424 prevail +3423 pretty +3422 preceded +3421 post +3420 possesses +3419 positions +3418 posing +3417 populate +3416 poll +3415 politically +3414 point-of-care +3413 poem +3412 plot +3411 plea +3410 plays +3409 players +3408 planned +3407 piece +3406 petition +3405 pervasive +3404 performed +3403 penalties +3402 patterns +3401 patronage +3400 partner +3399 participating +3398 participants +3397 paralyzed +3396 paradoxically +3395 paper +3394 pale +3393 upbringing +3392 overthrown +3391 outset +3390 outright +3389 out-perform +3388 organized +3387 procedure +3386 operating +3385 opened +3384 reverse +3383 senses +3382 strains +3381 earn +3380 studies +3379 overseas +3378 weaker +3377 matters +3376 entitlement +3375 company +3374 balance +3373 allegations +3372 Earth +3371 Banking +3370 officially +3369 King +3368 secession +3367 net +3366 extra +3365 evaporated +3364 occurs +3363 occasionally +3362 occasional +3361 nuts +3360 visible +3359 surging +3358 notably +3357 normally +3356 nativist +3355 national-level +3354 nation +3353 narrowly +3352 sell +3351 muscles +3350 mosque +3349 mortality +3348 stress +3347 months +3346 month +3345 moderate +3344 modeled +3343 mode +3342 mistaken +3341 mistake +3340 mentioned +3339 measured +3338 material +3337 mass +3336 marine +3335 manufacturing +3334 manufactured +3333 maneuvered +3332 malaise +3331 lured +3330 losers +3329 loosely +3328 looming +3327 lodge +3326 location +3325 located +3324 lobbying +3323 stream +3322 parrot +3321 lining +3320 light +3319 rapidly +3318 learned +3317 leader +3316 lawsuits +3315 principles +3314 criteria +3313 land +3312 laid +3311 lacked +3310 kingpins +3309 kinds +3308 jobs +3307 isolated +3306 … +3305 stability +3304 threatening +3303 rarely +3302 permitted +3301 patterned +3300 irritants +3299 involvement +3298 inviting +3297 investments +3296 invested +3295 invest +3294 invaded +3293 intrusion +3292 one-sided +3291 national +3290 low +3289 quarter +3288 initiative +3287 action +3286 wind +3285 currents +3284 design +3283 intervention +3282 liquidity +3281 intent +3280 promising +3279 installed +3278 inspired +3277 insofar +3276 insisting +3275 insisted +3274 pledging +3273 mutually +3272 insight +3271 insiders +3270 inroads +3269 rights +3268 inherit +3267 inhabit +3266 task +3265 incurred +3264 incumbents +3263 urgent +3262 uncertainty +3261 transfer +3260 tranches +3259 tradable +3258 quarter-century +3257 driver +3256 dock +3255 context +3254 stripping +3253 stark +3252 stable +3251 soil +3250 � +3249 socially +3248 sharing +3247 several +3246 restraint +3245 recorded +3244 protecting +3243 propping +3242 preference +3241 practical +3240 organizations +3239 obtaining +3238 uncertain +3237 neighboring +3236 millennia +3235 sheer +3234 interpreting +3233 gaining +3232 formerly +3231 feeling +3230 ensuring +3229 conflict +3228 storage +3227 cold +3226 calling +3225 averted +3224 agriculture +3223 advancing +3222 advanced +3221 Australia +3220 portfolio +3219 occupying +3218 guise +3217 democratic +3216 week +3215 Vienna +3214 Rwanda +3213 amp +3212 R +3211 Pristina +3210 Panama +3209 Mexican +3208 Bolivia +3207 Managua +3206 July +3205 Gilani +3204 Ethiopia +3203 Central +3202 2009 +3201 despite +3200 1933 +3199 [ +3198 improvements +3197 impresario +3196 implies +3195 implemented +3194 imperiled +3193 images +3192 illustrate +3191 identified +3190 identifiable +3189 hyphenated +3188 hydroelectric +3187 hunts +3186 housing +3185 houses +3184 horrors +3183 hope +3182 homeland +3181 holdings +3180 history +3179 historical +3178 highs +3177 highlighted +3176 apartment +3175 hidden +3174 hesitate +3173 lies +3172 herein +3171 helpful +3170 heed +3169 hearing +3168 headway +3167 never +3166 havoc +3165 unleashed +3164 thoughts +3163 risen +3162 occurred +3161 hat +3160 harrowing +3159 hardship +3158 happens +3157 happened +3156 handed +3155 restored +3154 participated +3153 guerrilla +3152 groups +3151 grounds +3150 underestimate +3149 greatly +3148 defining +3147 grave +3146 governing +3145 goes +3144 glib +3143 laughs +3142 favorable +3141 generally +3140 pressures +3139 funds +3138 functions +3137 fulfills +3136 fuel +3135 owning +3134 excess +3133 frenzy +3132 free-riding +3131 founded +3130 myself +3129 fought +3128 formed +3127 formative +3126 formation +3125 foremost +3124 forego +3123 votes +3122 flown +3121 flourish +3120 flock +3119 flat +3118 flares +3117 fire +3116 finishing +3115 fine +3114 purchase +3113 financially +3112 fill +3111 fifth +3110 fiasco +3109 fed +3108 features +3107 fashion +3106 farms +3105 poorly +3104 particularly +3103 faring +3102 fame +3101 failures +3100 failing +3099 factor +3098 incapacitated +3097 ruler +3096 facto +3095 facility +3094 exterminated +3093 extensively +3092 expression +3091 expressed +3090 lane +3089 express +3088 exploring +3087 exploits +3086 explaining +3085 explain +3084 experienced +3083 watches +3082 expensive +3081 expenditures +3080 populations +3079 expanding +3078 existence +3077 exist +3076 examined +3075 ever-rising +3074 events +3073 eurozone +3072 estate +3071 established +3070 essentially +3069 warm +3068 erupted +3067 entirely +3066 enterprise +3065 enraged +3064 engine +3063 engaged +3062 enforced +3061 ended +3060 enacted +3059 employing +3058 employed +3057 emphasizing +3056 emphasized +3055 stranglehold +3054 emerges +3053 embryonic +3052 embarrassingly +3051 elsewhere +3050 else +3049 elections +3048 echoed +3047 relationships +3046 drivers +3045 dramatically +3044 double-digit +3043 dominated +3042 dominant +3041 dollars +3040 dollar-denominated +3039 doctrine +3038 disturbances +3037 distressed +3036 disputed +3035 dispute +3034 display +3033 discovery +3032 discontinued +3031 disappointment +3030 diplomatic +3029 dioxide +3028 differently +3027 differences +3026 difference +3025 died +3024 dictatorships +3023 devices +3022 developing-country +3021 developed +3020 depicting +3019 depends +3018 demonstrations +3017 demonstrates +3016 demonstrated +3015 democracy +3014 degree +3013 definitely +3012 defeated +3011 decaying +3010 decayed +3009 debt-holders +3008 debating +3007 damned +3006 cynical +3005 revenues +3004 customs +3003 underway +3002 debated +3001 crowd +3000 cross-country +2999 critically +2998 credibility +2997 counted +2996 costs +2995 cost-effective +2994 converted +2993 code +2992 ethical +2991 contested +2990 contending +2989 contemplating +2988 constructed +2987 constrain +2986 consisting +2985 considerably +2984 consensus +2983 confident +2982 confidence +2981 composites +2980 composed +2979 competitiveness +2978 comparatively +2977 repeated +2976 commonly +2975 common +2974 heights +2973 commanding +2972 clustered +2971 clings +2970 class +2969 govern +2968 circumstances +2967 circulating +2966 choice +2965 cheering +2964 charred +2963 changeover +2962 changed +2961 chances +2960 chairs +2959 chair +2958 certainly +2957 centrist +2956 catchy +2955 casually +2954 cast- +2953 cash +2952 capital +2951 pose +2950 campaign +2949 camp +2948 vagaries +2947 turbines +2946 Inter-Governmental +2945 election +2944 bungling +2943 bulls +2942 bubble +2941 broke +2940 understood +2939 broadly +2938 broadcast +2937 broad +2936 brew +2935 bounds +2934 boosted +2933 demand +2932 booming +2931 boomed +2930 bombings +2929 bombed +2928 body +2927 blue +2926 besides +2925 believer +2924 interested +2923 behavior +2922 settled +2921 becomes +2920 increasingly +2919 hardened +2918 traded +2917 shared +2916 marching +2915 cumbersome +2914 neglected +2913 charge +2912 folded +2911 equivalent +2910 enhanced +2909 concentrated +2908 compensated +2907 primarily +2906 bars +2905 banned +2904 bankrupt +2903 ball +2902 burned +2901 background +2900 backed +2899 normal +2898 haunt +2897 awe +2896 averaged +2895 fertility +2894 height +2893 precisely +2892 noon +2891 assurance +2890 associations +2889 associates +2888 assaulted +2887 waged +2886 provisional +2885 polling +2884 hosting +2883 distinct +2882 armed +2881 arisen +2880 arise +2879 arguments +2878 argued +2877 argue +2876 situation +2875 strictly +2874 nowhere +2873 mounting +2872 deployed +2871 appropriations +2870 appointment +2869 appointees +2868 appeared +2867 anytime +2866 transition +2865 anxiety +2864 anti-war +2863 anti-shareholders +2862 announced +2861 transformed +2860 wealthier +2859 extreme +2858 forth +2857 anchored +2856 analysis +2855 extent +2854 amidst +2853 amid +2852 questioned +2851 estrangement +2850 alone +2849 alleged +2848 wrapped +2847 ala +2846 gonna +2845 ain +2844 aim +2843 agreement +2842 aggression +2841 sentiment +2840 aggravate +2839 agents +2838 ubiquitous +2837 heating +2836 twenty +2835 affairs +2834 advised +2833 adverse +2832 adjusted +2831 adequate +2830 ad +2829 actors +2828 active +2827 acknowledging +2826 accounts +2825 according +2824 absolutely +2823 absent +2822 Ai +2821 abide +2820 abandoning +2819 staple +2818 plant +2817 re-bar +2816 period +2815 discussion +2814 panel +2813 determining +2812 large-scale +2811 portion +2810 chord +2809 deep +2808 culture +2807 shortage +2806 butter +2805 YouTube +2804 Yes +2803 twice +2802 congratulated +2801 Yanukovich +2800 When +2799 Washington-based +2798 equities +2797 engagement +2796 Thailand +2795 Technology +2794 Summit +2793 Setting +2792 Secretariat +2791 Bottom +2790 Room +2789 One +2788 everywhere +2787 Nigerians +2786 Misguided +2785 Middle +2784 Man +2783 MacAskill +2782 Left +2781 Laboratory +2780 Kremlin +2779 _ +2778 Servan +2777 Jacques +2776 Iraqi +2775 later +2774 prevent +2773 Hezbollah +2772 HK +2771 Gallipoli +2770 benefited +2769 regularly +2768 Friends +2767 Facility +2766 Facebook +2765 meeting +2764 Transformation +2763 Environmental +2762 EU-wide +2761 curb +2760 rates +2759 constrained +2758 Degradation +2757 DNA +2756 rulings +2755 Coaster +2754 Economist +2753 Cambridge +2752 born +2751 Brazil +2750 Authority +2749 Armstrong +2748 Afghan +2747 Action +2746 A +2745 D +2744 47th +2743 350,000 +2742 1990 +2741 1973 +2740 1787 +2739 What +2738 declining +2737 steadily +2736 jeopardized +2735 authored +2734 partly +2733 writing +2732 thrashed +2731 therefore +2730 study +2729 schools +2728 risking +2727 resonates +2726 reported +2725 confirm +2724 letting +2723 indeed +2722 developing +2721 however +2720 protestors +2719 consideration +2718 especially +2717 concluding +2716 opportunities +2715 buried +2714 outward +2713 constituted +2712 shopping +2711 influence +2710 belatedly +2709 albeit +2708 cultivating +2707 co-authored +2706 PiS +2705 1994 +2704 PA +2703 MDGs +2702 gun +2701 couples +2700 Nordic +2699 Bird +2698 requires +2697 bearer +2696 standard +2695 xenophobic +2694 significant +2693 self-interest +2692 predicament +2691 discussed +2690 embattled +2689 edge +2688 clashing +2687 absence +2686 In +2685 'll +2684 promise +2683 ! +2682 taming +2681 ‘ +2680 ´ +2679 yields +2678 write +2677 wipe +2676 whistle +2675 weighted +2674 weak +2673 wave +2672 water +2671 wake +2670 voter +2669 vigilantes +2668 upside +2667 unique +2666 underlying +2665 twentieth-century +2664 triumph +2663 trader +2662 toxic +2661 today +2660 theme +2659 wrong +2658 bureau +2657 weather +2656 upper +2655 tried +2654 territory +2653 slack +2652 extension +2651 requisite +2650 prior +2649 renminbi +2648 reality +2647 project +2646 profits +2645 few +2644 nearest +2643 adopted +2642 measures +2641 maximum +2640 grab-bag +2639 family +2638 event +2637 electorate +2636 deliberate +2635 cause +2634 Review +2633 PLA +2632 Liberation +2631 ERM +2630 Clinton +2629 Chairman +2628 16th +2627 educational +2626 tertiary +2625 teacher +2624 sustains +2623 supplies +2622 steel +2621 spectrum +2620 specter +2619 speaker +2618 soul +2617 sophisticated +2616 smoking +2615 slightest +2614 sleeping +2613 skewed +2612 site +2611 signs +2610 showed +2609 shock +2608 seventeenth-century +2607 session +2606 ruled +2605 revolution +2604 revival +2603 revise +2602 returns +2601 rest +2600 responsible +2599 respect +2598 resignation +2597 resetting +2596 report +2595 reforms +2594 reform +2593 reflection +2592 reductions +2591 recognition +2590 reclaim +2589 rank +2588 ranging +2587 rally +2586 racist +2585 quality +2584 purchased +2583 public-sector +2582 proposal +2581 prognosis +2580 pro-trade +2579 pride +2578 pricing +2577 agreements +2576 previous +2575 prevalence +2574 presence +2573 practicing +2572 practices +2571 awaits +2570 unions +2569 powerful +2568 store +2567 politics +2566 : +2565 plausibility +2564 perpetrators +2563 peasant +2562 summer +2561 passive +2560 passing +2559 participate +2558 parcel +2557 outside +2556 outreach +2555 output-growth +2554 output +2553 Germans +2552 indirect +2551 effects +2550 onset +2549 one-seventh +2548 eve +2547 official +2546 constitutes +2545 decades +2544 surplus +2543 collective +2542 street +2541 raw +2540 murder +2539 formal +2538 swapping +2537 debt +2536 currency +2535 Zacharias +2534 House +2533 Turkish +2532 Safe +2531 Fund +2530 Monetary +2529 High +2528 Financial +2527 China-Japan +2526 Century +2525 emerged +2524 monopolist +2523 suspicion +2522 surgical +2521 sub-Saharan +2520 staff +2519 radical +2518 surface +2517 lost +2516 ten +2515 living +2514 health +2513 groundwater +2512 famous +2511 dynastic +2510 diagnosing +2509 curse +2508 reef +2507 coral +2506 committing +2505 epochal +2504 things +2503 Treasury +2502 The +2501 Silvio +2500 Sarkozy +2499 Poland +2498 Philippines-China +2497 Palestinian +2496 Nigerian +2495 Latin +2494 LDP +2493 Hong +2492 Hitler +2491 University +2490 Harvard +2489 Haiti +2488 National +2487 Gross +2486 Cuba +2485 dictator +2484 Chile +2483 Army +2482 11 +2481 1 +2480 ocean +2479 rot +2478 obvious +2477 object +2476 nuclear +2475 northern +2474 norm +2473 nonetheless +2472 rush +2471 networks +2470 nature +2469 terrorist +2468 narrow +2467 mouthpiece +2466 moisture +2465 modify +2464 ministerial +2463 present +2462 minders +2461 mid-1990 +2460 memorandum +2459 materials +2458 market-timing +2457 markedly +2456 ­ +2455 manifes +2454 manages +2453 machine +2452 lower +2451 lot +2450 legislative +2449 legally +2448 justice +2447 junta +2446 EU +2445 reinforcement +2444 interwar +2443 integral +2442 ingredients +2441 indicates +2440 passes +2439 incumbent +2438 region +2437 emerging +2436 newly +2435 3 +2434 kind +2433 importance +2432 implementation +2431 costly +2430 host +2429 horizons +2428 wife +2427 third +2426 hinged +2425 heralded +2424 heels +2423 ratified +2422 group +2421 governor +2420 governmental +2419 parties +2418 global +2417 generated +2416 game +2415 gains +2414 addressed +2413 percent +2412 full +2411 Indian +2410 exports +2409 oil +2408 inside +2407 cradle +2406 bailouts +2405 forms +2404 formats +2403 foreign-exchange +2402 without +2401 forces +2400 works +2399 school +2398 pedestrians +2397 football +2396 foot +2395 folk +2394 nurturing +2393 flash +2392 first-quarter +2391 Nations +2390 financing +2389 financial-sector +2388 fate +2387 growth +2386 faster +2385 exempt +2384 exchange +2383 misstep +2382 area +2381 euro +2380 ethanol +2379 enter +2378 enforcement +2377 insurance +2376 encourages +2375 economy +2374 economists +2373 eastern +2372 bird +2371 drove +2370 drops +2369 drives +2368 draft +2367 doesn +2366 documents +2365 dissolution +2364 devoid +2363 desires +2362 descend +2361 department +2360 deniers +2359 delta +2358 del +2357 deeper +2356 environment +2355 damaging +2354 cuts +2353 cultural +2352 crises +2351 countrys +2350 counterpart +2349 countered +2348 lingering +2347 corner +2346 inherited +2345 copy +2344 consistent +2343 confronts +2342 confines +2341 concession +2340 compulsory +2339 commit +2338 collections +2337 collapsed +2336 collapse +2335 coincided +2334 coal +2333 co-chairs +2332 change +2331 climate +2330 circle +2329 chapter +2328 championed +2327 celebrations +2326 caused +2325 carrot +2324 devastating +2323 cabinet +2322 late +2321 facts +2320 daily +2319 deeds +2318 burst +2317 built +2316 buildup +2315 undergo +2314 cancer +2313 brand +2312 branches +2311 booms +2310 meetings +2309 blended +2308 notions +2307 vague +2306 beneficiary +2305 believe +2304 priced +2303 below +2302 offset +2301 basin +2300 barrel +2299 applied +2298 balances +2297 Greece +2296 opposite +2295 fault +2294 director +2293 arrangements +2292 army +2291 arm +2290 fueling +2289 dependent +2288 appreciation +2287 answerable +2286 shrink +2285 deficit +2284 Soviet +2283 policies +2282 minds +2281 inland +2280 altogether +2279 allocate +2278 awarding +2277 advocates +2276 adoption +2275 medicine +2274 achieved +2273 acceptance +2272 stake +2271 bet +2270 parody +2269 bailout +2268 salt +2267 grain +2266 large +2265 weakening +2264 weakness +2263 confession +2262 ban +2261 World +2260 Warming +2259 Volga +2258 Treaty +2257 Tier +2256 Tibetan +2255 Thatcherite +2254 Taiwan +2253 Syrian +2252 Statute +2251 State +2250 Spins +2249 South +2248 nationalist +2247 Sinhalese +2246 September +2245 Security +2244 Russian +2243 Rodríguez +2242 Rice +2241 Southeast +2240 Race +2239 laureate +2238 Prize +2237 Population +2236 Prison +2235 President +2234 Power +2233 Palace +2232 Pacific +2231 Ossetian +2230 Oil +2229 Test +2228 Nuclear +2227 Northern +2226 NPT +2225 NEPAD +2224 Millennium +2223 Member +2222 May +2221 Libya +2220 Lebanese +2219 Korea +2218 Kong +2217 Kill +2216 Japanese +2215 Islam +2214 International +2213 IPCC +2212 IMF +2211 Holocaust +2210 Highway +2209 Heights +2208 Haitian +2207 Gulf +2206 Group +2205 W. +2204 George +2203 Staff +2202 General +2201 Union +2200 GDP +2199 Spotlight +2198 G-20 +2197 Cold +2196 From +2195 Forum +2194 FSB +2193 Executive +2192 Program +2191 Environment +2190 England +2189 Eastern +2188 liberal +2187 disintegration +2186 Depression +2185 111 +2184 sanctions +2183 imposed +2182 Department +2181 Delta +2180 Danish +2179 DSM +2178 DPP +2177 Cup +2176 Crown +2175 Congress +2174 Community +2173 Committee +2172 Co-Prosperity +2171 Climate +2170 Justice +2169 Chief +2168 Catholic +2167 Campaign +2166 CO2 +2165 Bush +2164 goals +2163 quantifiable +2162 Brown +2161 Gordon +2160 Minister +2159 Prime +2158 Baltic +2157 Asian +2156 Asia +2155 Argentina +2154 Gomes +2153 Ana +2152 Age +2151 affects +2150 ages +2149 ice +2148 temperatures +2147 90 +2146 9 +2145 20 +2144 meantime +2143 whose +2142 presided +2141 including +2140 dividend +2139 popularity +2138 mobilization +2137 / +2136 community +2135 varied +2134 wanting +2133 turmoil +2132 PAP +2131 ECB +2130 CCX +2129 CCP +2128 gilts +2127 bounce +2126 cat +2125 spirits +2124 orders +2123 worst +2122 wartime +2121 usual +2120 traditional +2119 prosecutor +2118 Fatah +2117 recent +2116 rebalancing +2115 proposed +2114 powers +2113 yanked +2112 cable +2111 position +2110 policy +2109 party +2108 own +2107 ongoing +2106 noncompliance +2105 motive +2104 military +2103 lumbering +2102 leadership +2101 largest +2100 fees +2099 tuition +2098 highest +2097 greatest +2096 great +2095 expectation +2094 employment +2093 effective +2092 dire +2091 constitutional +2090 citizens +2089 squad +2088 cheer +2087 bank +2086 central +2085 bosses +2084 bidding +2083 better +2082 avoidance +2081 assertive +2080 agenda +2079 Wings +2078 Old +2077 East +2076 2002 +2075 pink-collar +2074 accounting +2073 respective +2072 expectations +2071 claimed +2070 research +2069 closures +2068 bipartisan +2067 U.S +2066 shores +2065 s +2064 ™ +2063 € +2062 ‑ +2061 la +2060 à +2059 yourself +2058 yes +2057 wound +2056 wedge +2055 drive +2054 grew +2053 worries +2052 worried +2051 worldwide +2050 typically +2049 women +2048 shouting +2047 withdrawn +2046 room +2045 investment +2044 viable +2043 widely +2042 observed +2041 substantially +2040 while +2039 loans +2038 weapons +2037 weakened +2036 ways +2035 warfare +2034 want +2033 waited +2032 waging +2031 vote +2030 voice +2029 visited +2028 violations +2027 views +2026 viewed +2025 vice +2024 mouth +2023 word +2022 using +2021 used +2020 bureaucratic +2019 exercise +2018 unwise +2017 unwilling +2016 unwanted +2015 until +2014 unsecured +2013 unlikely +2012 unless +2011 university +2010 undertake +2009 duress +2008 uncooperative +2007 unable +2006 inward +2005 tu +2004 trying +2003 try +2002 truthful +2001 true +2000 trip +1999 treating +1998 transmitted +1997 transmission +1996 trained +1995 train +1994 trade +1993 hopeless +1992 touch +1991 tolerance +1990 drug +1989 toe +1988 fellow +1987 arrests +1986 yield +1985 win +1984 ward +1983 visit +1982 view +1981 venture +1980 vacuum +1979 remix +1978 unite +1977 undermine +1976 treat +1975 transmit +1974 coverage +1973 constituents +1972 idea +1971 creation +1970 struggle +1969 particular +1968 sustain +1967 surge +1966 suit +1965 structure +1964 strongly +1963 strengthen +1962 squeeze +1961 spend +1960 speak +1959 somewhere +1958 signing +1957 show +1956 badly +1955 score +1954 roost +1953 revive +1952 resort +1951 reports +1950 renaissance +1949 redefine +1948 reassess +1947 realize +1946 race +1945 psychiatry +1944 Korean +1943 North +1942 pronounce +1941 prominence +1940 privatize +1939 pour +1938 portable +1937 persist +1936 understanding +1935 inaccessible +1934 obtain +1933 occurring +1932 naturally +1931 authoritarian +1930 manipulate +1929 manage +1928 contributions +1927 lose +1926 long-term +1925 lives +1924 list +1923 less +1922 legitimize +1921 really +1920 know +1919 junior +1918 join +1917 emergence +1916 invite +1915 instructors +1914 insist +1913 innocent +1912 include +1911 decide +1910 hear +1909 hark +1908 grips +1907 grant +1906 gradual +1905 good +1904 priorities +1903 former +1902 foreign +1901 fiscal +1900 fend +1899 fare +1898 equip +1897 enhance +1896 endlessly +1895 Ukraine +1894 eligible +1893 dust +1892 drain +1891 dig +1890 designate +1889 deeply +1888 crumble +1887 criminal +1886 courses +1885 cope +1884 contribute +1883 continuing +1882 conducting +1881 combat +1880 changes +1879 catch +1878 carry +1877 buy +1876 burnish +1875 fold +1874 breast +1873 break +1872 borrow +1871 thanks +1870 boom +1869 blockade +1868 bigotry +1867 becoming +1866 worth +1865 finding +1864 won +1863 capable +1862 award +1861 average +1860 assuming +1859 assume +1858 treatment +1857 honest +1856 all-time +1855 al-Qaeda +1854 positive +1853 adopt +1852 actual +1851 accommodate +1850 accept +1849 abandon +1848 legislature +1847 halt +1846 Main +1845 Live +1844 Lebanon +1843 Italy +1842 It +1841 Indonesia +1840 Europe +1839 anyway +1838 5.3 +1837 tied +1836 throughout +1835 threatens +1834 thought +1833 though +1832 think +1831 thing +1830 shut +1829 thereafter +1828 probably +1827 voting +1826 settlements +1825 does +1824 British +1823 underlay +1822 tests +1821 test +1820 tens +1819 tends +1818 whether +1817 tell +1816 techniques +1815 taxing +1814 taxes +1813 targeted +1812 target +1811 tapping +1810 tapered +1809 talking +1808 takes +1807 tables +1806 command +1805 decisions +1804 ensure +1803 switches +1802 sustained +1801 surpasses +1800 surpass +1799 surely +1798 sure +1797 supposedly +1796 supportive +1795 support +1794 supply +1793 sucked +1792 success +1791 succeeds +1790 submit +1789 subject +1788 studied +1787 struck +1786 strive +1785 strengthened +1784 strained +1783 story +1782 still +1781 stepped +1780 stems +1779 headlong +1778 steam +1777 statist +1776 blocks +1775 started +1774 suffer +1773 squeezed +1772 spread +1771 spared +1770 south +1769 sound +1768 sought +1767 sort +1766 soaring +1765 so-called +1764 smiling +1763 slip +1762 slimmed +1761 single +1760 1881 +1759 simply +1758 similarly +1757 chips +1756 computer +1755 silicon-based +1754 sign +1753 shoots +1752 shooting +1751 shifted +1750 shift +1749 wrote +1748 knows +1747 she +1746 shareholders +1745 share +1744 seven +1743 settlement +1742 setting +1741 serving +1740 threat +1739 senior +1738 send +1737 selling +1736 self-adjusting +1735 self +1734 selected +1733 seen +1732 seeing +1731 second +1730 sea +1729 say +1728 said +1727 saddling +1726 sacked +1725 roughly +1724 rose +1723 role +1722 road +1721 rising +1720 reward +1719 revising +1718 revere +1717 retreat +1716 retired +1715 results +1714 resulted +1713 restore +1712 responsibility +1711 respects +1710 resource +1709 resisted +1708 resistance +1707 resembling +1706 resembled +1705 resemble +1704 replied +1703 replace +1702 reminds +1701 remedy +1700 remarkable +1699 remained +1698 troubled +1697 remain +1696 rely +1695 relinquish +1694 relief +1693 relegated +1692 releasing +1691 release +1690 relative +1689 relations +1688 relation +1687 related +1686 rejoin +1685 rejected +1684 reintroduce +1683 regulator +1682 regionally +1681 saw +1680 regime +1679 regardless +1678 regarding +1677 regard +1676 refusing +1675 refused +1674 refuse +1673 reform-minded +1672 received +1671 recalled +1670 reassure +1669 ready +1668 readily +1667 trouble +1666 re-emerging +1665 ratios +1664 rationale +1663 rather +1662 ran +1661 rallies +1660 raises +1659 railed +1658 quite +1657 quickly +1656 question +1655 qualify +1654 puts +1653 pushing +1652 pushes +1651 pushed +1650 push +1649 pursue +1648 purged +1647 punishing +1646 pulling +1645 pulled +1644 publicly +1643 proximity +1642 provision +1641 provides +1640 provider +1639 proven +1638 protection +1637 prospect +1636 prompted +1635 problems +1634 prize +1633 pressure +1632 pressing +1631 presidency +1630 premium +1629 pre-crisis +1628 pre-Christmas +1627 potentially +1626 potential +1625 possibility +1624 portrayed +1623 pollero +1622 poised +1621 points +1620 pointing +1619 pointed +1618 pleasing +1617 played +1616 placing +1615 pitting +1614 persuade +1613 performing +1612 per +1611 payments +1610 attention +1609 paying +1608 frequent +1607 pay +1606 passionless +1605 pain +1604 owing +1603 overturn +1602 overthrow +1601 overnight +1600 overall +1599 periods +1598 backwards +1597 outflank +1596 any +1595 originating +1594 ride +1593 provided +1592 options +1591 oppose +1590 opportunity +1589 operate +1588 remind +1587 training +1586 on-the-job +1585 trial +1584 track +1583 bottom +1582 steps +1581 modest +1580 sexy +1579 record +1578 preferred +1577 occasion +1576 state +1575 endangering +1574 earth +1573 dramatic +1572 sides +1571 board +1570 banks +1569 wealth +1568 par +1567 hot +1566 Bangladesh +1565 India +1564 Qaeda +1563 Al +1562 describe +1561 offshore +1560 offering +1559 offered +1558 wisdom +1557 trace +1556 energy +1555 total +1554 day +1553 credit +1552 capacity +1551 succession +1550 miscalculation +1549 substantial +1548 severely +1547 remaining +1546 reliance +1545 proxy +1544 power +1543 companies +1542 loss +1541 high-level +1540 beyond +1539 corruption +1538 interconnected +1537 Romania +1536 GNP +1535 obey +1534 immigrant +1533 numbers +1532 nowadays +1531 re-condemned +1530 notwithstanding +1529 none +1528 non-combatants +1527 nominate +1526 spring +1525 next +1524 risks +1523 nevertheless +1522 nervous +1521 right +1520 need +1519 necessary +1518 neat +1517 nearly +1516 nationwide +1515 nationally +1514 nation-wide +1513 naming +1512 open +1511 murdered +1510 moving +1509 moved +1508 move +1507 motivated +1506 mostly +1505 most +1504 monitors +1503 missed +1502 mired +1501 ministers +1500 minister +1499 starts +1498 million +1497 migrated +1496 middle +1495 merely +1494 men +1493 promised +1492 memorably +1491 members +1490 measure +1489 may +1488 matched +1487 match +1486 massively +1485 marginalized +1484 size +1483 times +1482 otherwise +1481 makes +1480 progress +1479 main +1478 loyalty +1477 love +1476 loudly +1475 looms +1474 looks +1473 looking +1472 looked +1471 look +1470 longer +1469 nurtured +1468 lock +1467 loath +1466 lived +1465 live +1464 lip +1463 little +1462 limited +1461 scope +1460 limit +1459 likened +1458 special +1457 lifting +1456 lifted +1455 lift +1454 licencia +1453 leveled +1452 let +1451 lest +1450 lent +1449 length +1448 lend +1447 Maoist +1446 leftist +1445 leaving +1444 least +1443 leak +1442 leads +1441 leading +1440 expect +1439 lead +1438 lay +1437 launched +1436 latter-day +1435 larger +1434 laboring +1433 labor +1432 tend +1431 krill +1430 certainty +1429 known +1428 quiet +1427 preached +1426 ordered +1425 30 +1424 judging +1423 judge +1422 jettisoned +1421 itself +1420 start +1419 worthwhile +1418 provide +1417 took +1416 serves +1415 keeps +1414 spreading +1413 issued +1412 issue +1411 is-in +1410 unfolding +1409 pumping +1408 preoccupied +1407 involving +1406 involves +1405 involved +1404 involve +1403 investors +1402 whatever +1401 sixties +1400 international +1399 service +1398 operation +1397 distress +1396 compliance +1395 TRT +1394 interviewed +1393 accepted +1392 internationally +1391 intended +1390 integration +1389 instrumental +1388 instructed +1387 institutional +1386 inspire +1385 inmates +1384 injuring +1383 initiated +1382 initially +1381 infected +1380 induced +1379 induce +1378 indictment +1377 indicted +1376 indicated +1375 stands +1374 index +1373 spending +1372 increasing +1371 inclusion +1370 includes +1369 included +1368 incited +1367 inadequate +1366 inability +1365 uniform +1364 process +1363 sector +1362 private +1361 opinion +1360 characterize +1359 league +1358 1950 +1357 tandem +1356 supporting +1355 southern +1354 serious +1353 search +1352 reducing +1351 sight +1350 plain +1349 backyard +1348 words +1347 motion +1346 mightily +1345 making +1344 line +1343 laying +1342 launching +1341 keeping +1340 outlook +1339 front +1338 fits +1337 euros +1336 dribs +1335 considering +1334 collusion +1333 bullishness +1332 banking +1331 Sharif +1330 Science +1329 Iraq +1328 France +1327 providing +1326 impressive +1325 important +1324 assumes +1323 immediately +1322 ignoring +1321 hurt +1320 huge +1319 how +1318 house +1317 hosts +1316 honestly +1315 hitting +1314 historically +1313 mechanisms +1312 automatic +1311 Dutchman +1310 hired +1309 here +1308 nearby +1307 resolve +1306 stabilize +1305 held +1304 heavy +1303 heading +1302 headed +1301 head +1300 supports +1299 left +1298 viewing +1297 receive +1296 yet +1295 happy +1294 thrown +1293 survived +1292 protected +1291 mounted +1290 offer +1289 harming +1288 outpouring +1287 evoke +1286 hardly +1285 harder +1284 hard-core +1283 happening +1282 happen +1281 halted +1280 half +1279 bailed +1278 saved +1277 habits +1276 guidance +1275 growing +1274 growers +1273 grow +1272 granting +1271 granted +1270 government +1269 governance +1268 gone +1267 unmet +1266 giving +1265 movement +1264 genuinely +1263 genuine +1262 general +1261 rise +1260 ground +1259 gas +1258 gained +1257 legal +1256 gain +1255 fully +1254 fueled +1253 pending +1252 frozen +1251 clock +1250 mechanical +1249 reaching +1248 now +1247 failure +1246 heart +1245 getting +1244 friends +1243 frequently +1242 freely +1241 freedom +1240 riding +1239 lunch +1238 lacking +1237 found +1236 form +1235 forgiven +1234 foreigners +1233 forcing +1232 us +1231 top +1230 price +1229 establishment +1228 someone +1227 prime +1226 outrageous +1225 misleading +1224 mere +1223 seemed +1222 poppy +1221 fighting +1220 bearing +1219 liberalization +1218 accelerated +1217 couple +1216 focused +1215 focus +1214 flying +1213 flow +1212 flood +1211 flight +1210 flew +1209 fit +1208 firm +1207 firing +1206 seem +1205 rage +1204 fires +1203 finds +1202 figures +1201 figure +1200 fight +1199 fiddle +1198 fell +1197 easing +1196 feeds +1195 feed +1194 fear +1193 favors +1192 favoring +1191 favor +1190 fast +1189 farmers +1188 surpassed +1187 falls +1186 falling +1185 fallen +1184 faith +1183 failed +1182 facing +1181 facilities +1180 faces +1179 eyes +1178 eye +1177 extensive +1176 expropriated +1175 exposed +1174 experience +1173 pick +1172 expected +1171 exists +1170 exhausted +1169 exception +1168 fiber +1167 every +1166 ever +1165 succeed +1164 occur +1163 joining +1162 having +1161 find +1160 eventually +1159 essential +1158 eradicated +1157 era +1156 equal +1155 entrepreneurial +1154 entire +1153 enticing +1152 entering +1151 entered +1150 ensued +1149 enroll +1148 meet +1147 enough +1146 engaging +1145 stimulus +1144 massive +1143 ends +1142 endorsement +1141 odd +1140 encountered +1139 encounter +1138 enabling +1137 en +1136 empowered +1135 employs +1134 atmosphere +1133 emitted +1132 emigration +1131 emerge +1130 embodied +1129 elites +1128 elected +1127 effort +1126 kill +1125 effectively +1124 effect +1123 educated +1122 edifice +1121 economies +1120 economically +1119 ecological +1118 earlier +1117 eager +1116 step +1115 arrive +1114 Malawi +1113 review +1112 due +1111 dubbed +1110 drowning +1109 dropped +1108 force +1107 drift +1106 drew +1105 drawing +1104 feet +1103 dragging +1102 drafted +1101 downward +1100 downgraded +1099 door +1098 stop +1097 done +1096 don +1095 via +1094 doing +1093 disturbed +1092 dismissed +1091 pull +1090 directly +1089 directed +1088 exposure +1087 direct +1086 difficult +1085 different +1084 environmental +1083 technologies +1082 development +1081 developers +1080 develop +1079 devastate +1078 deter +1077 destroying +1076 destined +1075 desperately +1074 designed +1073 deserved +1072 described +1071 deprived +1070 depended +1069 denied +1068 democrats +1067 demanded +1066 delivered +1065 deliver +1064 defiant +1063 defeat +1062 default +1061 value +1060 dollar +1059 decline +1058 declaring +1057 admit +1056 decision +1055 deceive +1054 decade +1053 debate +1052 death +1051 deals +1050 dealing +1049 deal +1048 dead +1047 days +1046 longer-term +1045 daunting +1044 damaged +1043 damage +1042 culprits +1041 culminated +1040 criticized +1039 critical +1038 space +1037 created +1036 crash +1035 craft +1034 cover +1033 counterparts +1032 counter +1031 influentials +1030 cosmopolitan +1029 convincing +1028 benefits +1027 Europeans +1026 convince +1025 conventional +1024 convened +1023 controls +1022 contributing +1021 contrary +1020 continued +1019 continue +1018 ourselves +1017 construct +1016 prosecuted +1015 conspiracies +1014 considers +1013 connection +1012 connected +1011 confront +1010 confined +1009 committees +1008 conditions +1007 conditional +1006 concluded +1005 dios +1004 con +1003 legacy +1002 complex +1001 competitor +1000 competitively +999 competition +998 commodities +997 committed +996 commitment +995 commentators +994 pass +993 together +992 resembles +991 closely +990 path +989 clear +988 circulated +987 chunk +986 chosen +985 chose +984 choke +983 chase +982 sent +981 working +980 charges +979 changing +978 challenging +977 challenges +976 chain +975 cells +974 celebrated +973 caught +972 cast +971 cases +970 nose +969 cares +968 cards +967 captured +966 capital- +965 comparison +964 cannot +963 candidates +962 came +961 called +960 bypass +959 undermining +958 low-skilled +957 side +956 prompting +955 hook +954 plans +953 following +952 efforts +951 contrast +950 achieving +949 officials +948 Francis +947 market-based +946 decentralized +945 bust +944 severe +943 buckle +942 broken +941 urging +940 broadcasts +939 mind +938 brings +937 bringing +936 bright +935 breaks +934 come +933 bound +932 countries +931 borrowers +930 Kenya +929 boost +928 boils +927 boats +926 blown +925 blowing +924 blow +923 birth +922 binds +921 bill +920 bid +919 bets +918 best +917 herding +916 bent +915 benefiting +914 benchmark +913 belong +912 subjected +911 carried +910 schedule +909 showing +908 begun +907 administration +906 beginning +905 putting +904 began +903 scheduled +902 reduced +901 forced +900 been +899 lever +898 operational +897 they +896 Turkey +895 became +894 beating +893 beaten +892 beat +891 bearish +890 bear +889 welcomed +888 urged +887 taken +886 swept +885 surprised +884 hand-washing +883 widespread +882 served +881 returned +880 rescued +879 keep +878 relied +877 placed +876 made +875 led +874 killed +873 safe +872 kept +871 jailed +870 inflated +869 going +868 get +867 faced +866 encouraged +865 dumped +864 closed +863 forward +862 based +861 barring +860 barely +859 bare +858 bandits +857 ballots +856 bail +855 news +854 backward +853 backing +852 onto +851 poverty +850 aware +849 awards +848 awarded +847 avoid +846 available +845 attribute +844 attending +843 attend +842 attempts +841 extricate +840 attempt +839 attacks +838 attacking +837 attacked +836 attack +835 attachment +834 unprecedented +833 root +832 reversing +831 odds +830 independence +829 home +828 hand +827 draining +826 dating +825 coping +824 building +823 pace +822 robust +821 cost +820 sites +819 African +818 Southern +817 associated +816 turning +815 assisting +814 assisted +813 assist +812 assailing +811 aspiring +810 asking +809 ask +808 aside +807 recently +806 order +805 possible +804 opposed +803 high +802 matter +801 consequence +800 arrested +799 arms +798 secure +797 taught +796 supposed +795 bubbles +794 paid +793 meant +792 losses +791 incur +790 largely +789 extended +788 devoted +787 currently +786 see +785 certain +784 pursued +783 met +782 being +781 defenders +780 ardent +779 treated +778 appropriately +777 approached +776 approach +775 presidential +774 appointed +773 appetite +772 Peronism +771 1943 +770 appearance +769 appear +768 appealing +767 authority +766 appealed +765 apart +764 anywhere +763 anyone +762 face-to-face +761 another +760 annually +759 soar +758 inflation +757 annual +756 threats +755 tending +754 prison +753 sentenced +752 expel +751 proceeded +750 others +749 fails +748 last +747 check +746 casualties +745 incentive +744 impediment +743 laundry +742 use +741 ample +740 amounts +739 amounting +738 among +737 am +736 sells +735 needs +734 already +733 alongside +732 Anglo-American +731 almost +730 complete +729 allowing +728 fail +727 himself +726 further +725 allow +724 allocated +723 cure +722 no +721 harm +720 aligned +719 alienated +718 alienate +717 al +716 airy +715 aiming +714 aiding +713 aid +712 ahead +711 agrees +710 agreeing +709 institutionally +708 agreed +707 agree +706 agencies +705 risk +704 leap +703 boarding +702 afford +701 affecting +700 affected +699 advice +698 advance +697 admittedly +696 adjust +695 address +694 toll +693 addition +692 adding +691 addiction +690 added +689 anything +688 add +687 represent +686 actually +685 goodwill +684 additional +683 engender +682 likelier +681 appears +680 activity +679 activities +678 act +677 borders +676 Afghanistan +675 across +674 scale +673 greater +672 achieve +671 accustomed +670 accused +669 account +668 accordingly +667 accord +666 make +665 Washington +664 Barak +663 accompany +662 accommodates +661 accession +660 access +659 absolved +658 above +657 first +656 about +655 recall +654 shackles +653 melt +652 able +651 set +650 awareness +649 public +648 generate +647 ability +646 abandoned +645 strong +644 source +643 slowdown +642 sharp +641 return +640 explicit +639 an +638 response +637 guide +636 reliable +635 ratio +634 had +633 where +632 number +631 Kosovo +630 mission +629 fact-finding +628 new +627 trusted +626 man +625 addressing +624 coming +623 long +622 livelihood +621 job +620 healthy +619 under +618 floor +617 finger +616 draw +615 course +614 cornerstone +613 concern +612 call +611 close +610 one +609 cutting +608 case +607 Yugoslavia +606 Viktor +605 Thaksin +604 Taliban +603 Supreme +602 Strong +601 States +600 Saudi +599 Russia +598 Royal +597 Peru +596 NLA +595 Mexico +594 Long +593 Jong-il +592 Kim +591 Khamenei +590 Japan +589 Israelis +588 Israel +587 Iran +586 Internet +585 him +584 told +583 conducted +582 Home-Grown +581 Help +580 or +579 HIV +578 Grand +577 Gorbachev +576 soldiers +575 genius +574 Free +573 office +572 assumed +571 after +570 shortly +569 Kennedy +568 F. +567 aced +566 F +565 European +564 political +563 Europe-wide +562 Dutch-speaking +561 Dixon +560 ruling +559 uphold +558 must +557 Court +556 Conference +555 literary +554 Genet +553 declared +552 Cocteau +551 Civic +550 stand +549 could +548 slips +547 Britain +546 Bismarck +545 Beginning +544 Bashir +543 Back +542 attended +541 I +540 As +539 survive +538 Arabia +537 went +536 turned +535 America +534 Again +533 Africa +532 parts +531 some +530 AIDS +529 28-year-old +528 2007 +527 1917 +526 years +525 100 +524 15 +523 10 +522 dismiss +521 fact +520 just +519 concerned +518 - +517 written +516 range +515 wide +514 gave +513 who +512 meaning +511 achievement +510 joined +509 go +508 president +507 urge +506 towards +505 paraphrase +504 jump +503 there +502 starting +501 ruined +500 resist +499 increase +498 rapid +497 pursuing +496 plotting +495 perhaps +494 only +493 again +492 once +491 notice +490 moment +489 guard +488 might +487 was +486 released +485 drop +484 plan +483 my +482 instead +481 violation +480 conference +479 press +478 joint +477 administrative +476 real +475 implementing +474 ignored +473 equally +472 divided +471 if +470 helped +469 demise +468 speed +467 asked +466 has +465 given +464 example +463 produce +462 prone +461 markets +460 covering +459 trust +458 can +457 calls +456 buying +455 but +454 glimpse +453 begin +452 befitting +451 surrounding +450 publicity +449 bad +448 were +447 seat +446 allowed +445 her +444 animate +443 banal +442 often +441 incorporating +440 aimed +439 Mubarak +438 highly +437 more +436 ) +435 Saddam +434 ( +433 far +432 US +431 affect +430 big +429 too +428 standing +427 passed +426 abroad +425 near +424 me +423 charged +422 walk +421 't +420 willingness +419 victory +418 19 +417 system +416 security +415 social +414 hit +413 mate +412 running +411 determined +410 usually +409 Saudis +408 relationship +407 put +406 embrace +405 old +404 holding +403 claim +402 you +401 telling +400 're +399 'a +398 money +397 & +396 he +395 ] +394 % +393 ; +392 45 +391 # +390 workers +389 current +388 when +387 whatsoever +386 what +385 welfare +384 voters +383 upon +382 unlike +381 undermined +380 ultimately +379 turn +378 toward +377 your +376 young +375 work +374 two +373 interests +372 hundreds +371 fore +370 crisis +369 United +368 second-line +367 protect +366 Balkans +365 Western +364 prepare +363 name +362 point +361 media +360 local +359 leave +358 key +357 issues +356 heavily +355 executive +354 chief +353 institutions +352 even +351 each +350 cooperative +349 block +348 blame +347 acute +346 Pakistani +345 levels +344 German +343 French +342 American +341 coalition +340 throwing +339 throw +338 world +337 through +336 within +335 this +334 thinkers +333 themselves +332 advantage +331 take +330 closer +329 something +328 government-induced +327 result +326 poorest +325 performance +324 neighborhood +323 many +322 court +321 combined +320 ambition +319 accommodated +318 will +317 Americans +316 than +315 testing +314 targeting +313 talk +312 tag +311 suspects +310 survivors +309 surveys +308 supporters +307 subservient +306 states +305 spot +304 spent +303 soon +302 small +301 sit +300 then +299 since +298 should +297 shots +296 sets +295 services +294 serve +293 sending +292 seems +291 opt +290 seek +289 screening +288 schoolchildren +287 up +286 round +285 resources +284 tax +283 resorting +282 rescuing +281 required +280 require +279 replaced +278 reminding +277 reminded +276 regulators +275 reached +274 reach +273 our +272 raise +271 poor +270 policymakers +269 play +268 planners +267 place +266 pit +265 over +264 taxpayers +263 ordinary +262 operators +261 ones +260 them +259 part +258 membership +257 market +256 imported +255 basis +254 regular +253 off +252 whom +251 which +250 wasteful +249 40 +248 during +247 divide +246 sectarian +245 country +244 means +243 lowered +242 domestic +241 directors +240 Palestine +239 Chinese +238 so +237 nothing +236 much +235 member +234 loyalists +233 mobile +232 link +231 life +230 between +229 field +228 playing +227 level +226 leaves +225 leaders +224 label +223 its +222 would +221 it +220 is +219 investigation +218 lit +217 murders +216 royal +215 hands +214 accepting +213 independent +212 increased +211 income +210 incentives +209 terms +208 these +207 future +206 seeking +205 rallying +204 judgment +203 Pakistan +202 beggar-thy-neighbor +201 immigrants +200 household +199 honor +198 hold +197 past +196 way +195 his +194 higher +193 helps +192 individual +191 help +190 hard +189 guarantee +188 governments +187 globally +186 gives +185 give +184 respond +183 prepared +182 genetically +181 very +180 original +179 mainly +178 corrupt +177 rein +176 free +175 various +174 realm +173 oldest +172 financial +171 such +170 presidents +169 guess +168 correctly +167 outsiders +166 human +165 doctors +164 cell +163 basic +162 Alfonsín +161 followed +160 follow +159 fired +158 have +157 financiers +156 short +155 fall +154 opposition +153 face +152 executed +151 exclusion +150 everyone +149 encouraging +148 empower +147 employee +146 easier +145 driving +144 drawn +143 throats +142 down +141 doves +140 either +139 like +138 not +137 do +136 shambles +135 economic +134 people +133 distract +132 encourage +131 did +130 description +129 denying +128 cut +127 criminals +126 courtesy +125 conversion +124 confronted +123 concerning +122 compound +121 completely +120 other +119 compared +118 commemorating +117 comes +116 classify +115 claims +114 children +113 challenged +112 centers +111 saying +110 by +109 Egyptian +108 demoralized +107 business +106 financial-market +105 burden +104 buck +103 brought +102 retailing +101 downstream +100 branching +99 both +98 blamed +97 billion +96 benefit +95 beneficiaries +94 before +93 linked +92 become +91 because +90 run +89 be +88 back +87 away +86 care +85 taking +84 same +83 rate +82 end +81 helping +80 age +79 at +78 assets +77 with +76 engage +75 we +74 those +73 early +72 around +71 Arabs +70 Israeli +69 questions +68 are +67 appeal +66 anointed +65 abuse +64 animal +63 four +62 into +61 amount +60 also +59 lines +58 behind +57 along +56 all +55 time +54 air +53 ago +52 panels +51 against +50 year +49 vested +48 protects +47 that +46 law +45 rule +44 backer +43 major +42 a +41 Ireland +40 Greek +39 Germany +38 cattle +37 Fulani +36 China +35 depositors +34 . +33 embolden +32 and +31 -- +30 whereas +29 likely +28 challenge +27 bring +26 from +25 traits +24 feminine +23 " +22 out +21 killers +20 existing +19 agricultural +18 well +17 interest +16 ' +15 $ +14 as +13 on +12 of +11 their +10 the +9 to +8 for +7 's +6 politicians +5 in +4 transatlantic +3 NATO +2 stock +1 , +0 Atlantic diff --git a/mosesdecoder/contrib/rt/Empty.c b/mosesdecoder/contrib/rt/Empty.c new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mosesdecoder/contrib/rt/README b/mosesdecoder/contrib/rt/README new file mode 100644 index 0000000000000000000000000000000000000000..d7a4cfebc0c56bd3a15ee8364dd98d9fc9c3643f --- /dev/null +++ b/mosesdecoder/contrib/rt/README @@ -0,0 +1,9 @@ +FOR OSX ONLY +------------ + +This creates an empty library file + librt.a +It should be used when you are compile with Eclipse on OSX. + +The Eclipse projects are set up to link to librt but OSX doesn't have it so this just creates a dummy library. + diff --git a/mosesdecoder/contrib/rt/compile.sh b/mosesdecoder/contrib/rt/compile.sh new file mode 100644 index 0000000000000000000000000000000000000000..6266d58d674024c34507490f18dbf39039e394d5 --- /dev/null +++ b/mosesdecoder/contrib/rt/compile.sh @@ -0,0 +1,2 @@ +gcc -c Empty.c -o Empty.o +ar rcs librt.a Empty.o \ No newline at end of file diff --git a/mosesdecoder/contrib/sigtest-filter/README.txt b/mosesdecoder/contrib/sigtest-filter/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..819fc56433c2700a8bf8379ad60eae8369a08b7c --- /dev/null +++ b/mosesdecoder/contrib/sigtest-filter/README.txt @@ -0,0 +1,47 @@ +Re-implementation of Johnson et al. (2007)'s phrasetable filtering strategy. + +This implementation relies on Joy Zhang's SALM Suffix Array toolkit. It is +available here: + + http://projectile.sv.cmu.edu/research/public/tools/salm/salm.htm + +--Chris Dyer + +BUILD INSTRUCTIONS +--------------------------------- + +1. Download and build SALM. + +2. If your boost path is in a non-standard directory: + export CPLUS_INCLUDE_PATH=/path/to/boost/include + export LIBRARY_PATH=/path/to/boost/lib64 + +3. make SALMDIR=/path/to/SALM + + + +USAGE INSTRUCTIONS +--------------------------------- + +1. Using the SALM/Bin/Linux/Index/IndexSA.O32, create a suffix array index + of the source and target sides of your training bitext. + +2. cat phrase-table.txt | ./filter-pt -e TARG.suffix -f SOURCE.suffix \ + -l + + FILTER-VALUE is the -log prob threshold described in Johnson et al. + (2007)'s paper. It may be either 'a+e', 'a-e', or a positive real + value. 'a+e' is a good setting- it filters out <1,1,1> phrase pairs. + I also recommend using -n 30, which filteres out all but the top + 30 phrase pairs, sorted by P(e|f). This was used in the paper. + +3. Run with no options to see more use-cases. + + +REFERENCES +--------------------------------- + +H. Johnson, J. Martin, G. Foster and R. Kuhn. (2007) Improving Translation + Quality by Discarding Most of the Phrasetable. In Proceedings of the 2007 + Joint Conference on Empirical Methods in Natural Language Processing and + Computational Natural Language Learning (EMNLP-CoNLL), pp. 967-975. diff --git a/mosesdecoder/contrib/sigtest-filter/check-install b/mosesdecoder/contrib/sigtest-filter/check-install new file mode 100644 index 0000000000000000000000000000000000000000..ba4f431e0076b3e4d0cbcb335ee19e0509d52d58 --- /dev/null +++ b/mosesdecoder/contrib/sigtest-filter/check-install @@ -0,0 +1,5 @@ +#!/usr/bin/perl -w +use strict; +my $path = shift @ARGV; +die "Can't find SALM installation path: $path\nPlease use:\n\n make SALMDIR=/path/to/SALM\n\n" unless (-d $path); +exit 0; diff --git a/mosesdecoder/contrib/sigtest-filter/sigtest-filter.vcproj b/mosesdecoder/contrib/sigtest-filter/sigtest-filter.vcproj new file mode 100644 index 0000000000000000000000000000000000000000..a961ac61dd2c9e617365995b0bc1bc2ec01a1c2e --- /dev/null +++ b/mosesdecoder/contrib/sigtest-filter/sigtest-filter.vcproj @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-cpt.h b/mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-cpt.h new file mode 100644 index 0000000000000000000000000000000000000000..dbfb947e31fb418be195c1b797e8c1b20ef05c62 --- /dev/null +++ b/mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-cpt.h @@ -0,0 +1,1332 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. // +// // +// ModelBlocks is free software: you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation, either version 3 of the License, or // +// (at your option) any later version. // +// // +// ModelBlocks is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with ModelBlocks. If not, see . // +// // +// ModelBlocks developers designate this particular file as subject to // +// the "Moses" exception as provided by ModelBlocks developers in // +// the LICENSE file that accompanies this code. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _NL_CPT__ +#define _NL_CPT__ + +//#include +//#include +//#include +//#include +//using namespace std; +//#include "nl-string.h" +//#include "nl-safeids.h" +//#include "nl-stringindex.h" +#include "nl-randvar.h" +//#include "nl-probmodel.h" +#include "nl-hash.h" +//#include +//using namespace tr1; + +#include + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// psNil +// +//////////////////////////////////////////////////////////////////////////////// + +char psNil[] = ""; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// Unit +// +//////////////////////////////////////////////////////////////////////////////// + +class Unit { + public: + void write(FILE* pf)const{} + size_t getHashKey ( ) const { return 0; } + bool operator== ( const Unit& u ) const { return true; } + bool operator< ( const Unit& u ) const { return false; } + friend ostream& operator<< ( ostream& os, const Unit& u ) { return os; } + friend String& operator<< ( String& str, const Unit& u ) { return str; } + friend IStream operator>> ( pair si_m, const char* psD ) { return si_m.first; } + + // OBSOLETE! + friend pair operator>> ( StringInput si, Unit& m ) { return pair(si,&m); } + friend StringInput operator>> ( pair si_m, const char* psD ) { return si_m.first; } +}; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// MapKeyXD +// +//////////////////////////////////////////////////////////////////////////////// + +//////////////////// +template +class MapKey1D { + private: + X1 x1; + public: + const X1& getX1() const { return x1; } + size_t getHashKey ( ) const { return x1.getHashKey(); } + MapKey1D ( ) { } + MapKey1D ( const X1& a1 ) { x1=a1; } + bool operator== ( const MapKey1D& h ) const { return(x1==h.x1); } + bool operator< ( const MapKey1D& h ) const { return(x1< h.x1); } + friend ostream& operator<< ( ostream& os, const MapKey1D& k ) { return os<& k ) { return str<> ( pair*> is_m, const char* psD ) { + //MapKey1D& m = *is_m.second; + return is_m.first>>is_m.second->x1>>psD; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, MapKey1D& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + MapKey1D& m = *si_m.second; + return si_m.first>>m.x1>>psD; + } +}; + +//////////////////// +template +class MapKey2D { + private: + X1 x1; + X2 x2; + public: + const X1& getX1() const { return x1; } + const X2& getX2() const { return x2; } + size_t getHashKey ( ) const { size_t k=rotLeft(x1.getHashKey(),3); k^=x2.getHashKey(); return k; } + MapKey2D ( ) { } + MapKey2D ( const X1& a1, const X2& a2 ) { x1=a1; x2=a2; } + bool operator== ( const MapKey2D& h ) const { return(x1==h.x1 && x2==h.x2); } + bool operator< ( const MapKey2D& h ) const { + return ( (x1& k ) { return os<& k ) { return str<> ( pair*> is_m, const char* psD ) { + MapKey2D& m = *is_m.second; IStream is2; + IStream is=is_m.first>>m.x1>>" "; + while((is2=is>>" ")!=IStream())is=is2; + return is>>m.x2>>psD; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, MapKey2D& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + MapKey2D& m = *si_m.second; StringInput si2; + StringInput si=si_m.first>>m.x1>>" "; + while((si2=si>>" ")!=NULL)si=si2; + return si>>m.x2>>psD; + } +}; + +//////////////////// +template +class MapKey3D { + private: + X1 x1; + X2 x2; + X3 x3; + public: + const X1& getX1() const { return x1; } + const X2& getX2() const { return x2; } + const X3& getX3() const { return x3; } + size_t getHashKey ( ) const { size_t k=rotLeft(x1.getHashKey(),3); k=rotLeft(k^x2.getHashKey(),3); k^=x3.getHashKey(); return k; } + MapKey3D ( ) { } + MapKey3D ( const X1& a1, const X2& a2, const X3& a3 ) { x1=a1; x2=a2; x3=a3; } + bool operator== ( const MapKey3D& h ) const { return(x1==h.x1 && x2==h.x2 && x3==h.x3); } + bool operator< ( const MapKey3D& h ) const { + return ( (x1& k ) { return os<& k ) { return str<> ( pair*> is_m, const char* psD ) { + MapKey3D& m = *is_m.second; IStream is2; + IStream is=is_m.first>>m.x1>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x2>>" "; + while((is2=is>>" ")!=IStream())is=is2; + return is>>m.x3>>psD; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, MapKey3D& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + MapKey3D& m = *si_m.second; StringInput si2; + StringInput si=si_m.first>>m.x1>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x2>>" "; + while((si2=si>>" ")!=NULL)si=si2; + return si>>m.x3>>psD; + } +}; + +//////////////////// +template +class MapKey4D { + private: + X1 x1; + X2 x2; + X3 x3; + X4 x4; + public: + const X1& getX1() const { return x1; } + const X2& getX2() const { return x2; } + const X3& getX3() const { return x3; } + const X4& getX4() const { return x4; } + size_t getHashKey ( ) const { size_t k=rotLeft(x1.getHashKey(),3); k=rotLeft(k^x2.getHashKey(),3); k=rotLeft(k^x3.getHashKey(),3); k^=x4.getHashKey(); return k; } + MapKey4D ( ) { } + MapKey4D ( const X1& a1, const X2& a2, const X3& a3, const X4& a4 ) { x1=a1; x2=a2; x3=a3; x4=a4; } + bool operator== ( const MapKey4D& h ) const { return(x1==h.x1 && x2==h.x2 && x3==h.x3 && x4==h.x4); } + bool operator< ( const MapKey4D& h ) const { + return ( (x1& k ) { return os<& k ) { return str<> ( pair*> is_m, const char* psD ) { + MapKey4D& m = *is_m.second; IStream is2; + IStream is=is_m.first>>m.x1>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x2>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x3>>" "; + while((is2=is>>" ")!=IStream())is=is2; + return is>>m.x4>>psD; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, MapKey4D& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + MapKey4D& m = *si_m.second; StringInput si2; + StringInput si=si_m.first>>m.x1>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x2>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x3>>" "; + while((si2=si>>" ")!=NULL)si=si2; + return si>>m.x4>>psD; + } +}; + +//////////////////// +template +class MapKey5D { + private: + X1 x1; + X2 x2; + X3 x3; + X4 x4; + X5 x5; + public: + const X1& getX1() const { return x1; } + const X2& getX2() const { return x2; } + const X3& getX3() const { return x3; } + const X4& getX4() const { return x4; } + const X5& getX5() const { return x5; } + size_t getHashKey ( ) const { size_t k=rotLeft(x1.getHashKey(),3); k=rotLeft(k^x2.getHashKey(),3); + k=rotLeft(k^x3.getHashKey(),3); k=rotLeft(k^x4.getHashKey(),3); k^=x5.getHashKey(); return k; } + MapKey5D ( ) { } + MapKey5D ( const X1& a1, const X2& a2, const X3& a3, const X4& a4, const X5& a5 ) { x1=a1; x2=a2; x3=a3; x4=a4; x5=a5; } + bool operator== ( const MapKey5D& h ) const { return(x1==h.x1 && x2==h.x2 && x3==h.x3 && x4==h.x4 && x5==h.x5); } + bool operator< ( const MapKey5D& h ) const { + return ( (x1& k ) { return os<& k ) { return str<> ( pair*> is_m, const char* psD ) { + MapKey5D& m = *is_m.second; IStream is2; + IStream is=is_m.first>>m.x1>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x2>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x3>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x4>>" "; + while((is2=is>>" ")!=IStream())is=is2; + return is>>m.x5>>psD; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, MapKey5D& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + MapKey5D& m = *si_m.second; StringInput si2; + StringInput si=si_m.first>>m.x1>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x2>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x3>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x4>>" "; + while((si2=si>>" ")!=NULL)si=si2; + return si>>m.x5>>psD; + } +}; + +//////////////////// +template +class MapKey6D { + private: + X1 x1; + X2 x2; + X3 x3; + X4 x4; + X5 x5; + X6 x6; + public: + const X1& getX1() const { return x1; } + const X2& getX2() const { return x2; } + const X3& getX3() const { return x3; } + const X4& getX4() const { return x4; } + const X5& getX5() const { return x5; } + const X5& getX6() const { return x6; } + size_t getHashKey ( ) const { size_t k=rotLeft(x1.getHashKey(),3); k=rotLeft(k^x2.getHashKey(),3); + k=rotLeft(k^x3.getHashKey(),3); k=rotLeft(k^x4.getHashKey(),3); k=rotLeft(k^x5.getHashKey(),3); k^=x6.getHashKey(); return k; } + MapKey6D ( ) { } + MapKey6D ( const X1& a1, const X2& a2, const X3& a3, const X4& a4, const X5& a5, const X6& a6 ) { x1=a1; x2=a2; x3=a3; x4=a4; x5=a5; x6=a6; } + bool operator== ( const MapKey6D& h ) const { return(x1==h.x1 && x2==h.x2 && x3==h.x3 && x4==h.x4 && x5==h.x5 && x6==h.x6); } + bool operator< ( const MapKey6D& h ) const { + return ( (x1& k ) { return os<& k ) { return str<> ( pair*> is_m, const char* psD ) { + MapKey6D& m = *is_m.second; IStream is2; + IStream is=is_m.first>>m.x1>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x2>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x3>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x4>>" "; + while((is2=is>>" ")!=IStream())is=is2; + is=is>>m.x5>>" "; + while((is2=is>>" ")!=IStream())is=is2; + return is>>m.x6>>psD; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, MapKey6D& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + MapKey6D& m = *si_m.second; StringInput si2; + StringInput si=si_m.first>>m.x1>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x2>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x3>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x4>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>m.x5>>" "; + while((si2=si>>" ")!=NULL)si=si2; + return si>>m.x6>>psD; + } +}; + +// Declare random access conditional probability tables (un-iteratable) +#include "nl-racpt.h" + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// +// Multimap CPTXDModel +// +//////////////////////////////////////////////////////////////////////////////// + +/* +template +class BaseIterVal : public V { + public: + static const int NUM_ITERS = 1; + II iter; + BaseIterVal ( ) : V(), iter(typename II::first_type(0,0),typename II::second_type(0,0)) { } + BaseIterVal ( II ii ) : V((ii.first!=ii.second)?ii.first->first:V()), iter(ii) { } + BaseIterVal ( V v ) : V(v), iter(typename II::first_type(0,0),typename II::second_type(0,0)) { } + BaseIterVal& operator++ ( int ) { V& v=*this; iter.first++; if(iter.first!=iter.second)v=iter.first->first; return *this; } +}; +*/ + +//////////////////////////////////////////////////////////////////////////////// + + +template +class SimpleMap : public map { + private: + typedef map OrigMap; + static const Y yDummy; + + public: + // Constructor / destructor methods... + SimpleMap ( ) : OrigMap() { } + SimpleMap ( int i ) : OrigMap() { } + SimpleMap (const SimpleMap& s) : OrigMap(s) { } + // Specification methods... + Y& set ( const X& x ) { return OrigMap::operator[](x); } + // Extraction methods... + const Y& get ( const X& x ) const { return (OrigMap::end()!=OrigMap::find(x)) ? OrigMap::find(x)->second : yDummy; } + bool contains ( const X& x ) const { return (OrigMap::end()!=OrigMap::find(x)); } + friend ostream& operator<< ( ostream& os, const SimpleMap& h ) { + for ( typename SimpleMap::const_iterator it=h.begin(); it!=h.end(); it++ ) + os<<((it==h.begin())?"":",")<first<<":"<second; + return os; + } +}; +template const Y SimpleMap::yDummy = Y(); + + +//////////////////////////////////////////////////////////////////////////////// + +#define MAP_CONTAINER SimpleMap +//#define MAP_CONTAINER SimpleHash + +template +class GenericCPTModel : public MAP_CONTAINER > { + private: + typedef MAP_CONTAINER > HKYP; + typedef typename MAP_CONTAINER::const_iterator IYP; + //HKYP h; + + public: + typedef Y RandVarType; + typedef Y RVType; +/* typedef BaseIterVal,Y> IterVal; */ + typedef MAP_CONTAINER distribution; + +/* bool setFirst ( IterVal& ikyp, const K& k ) const { */ +/* const MAP_CONTAINER& hyp = MAP_CONTAINER >::get(k); */ +/* ikyp = std::pair(hyp.begin(),hyp.end()); */ +/* return ( ikyp.iter.first != ikyp.iter.second ); */ +/* } */ +/* bool setNext ( IterVal& ikyp, const K& k ) const { */ +/* if ( ikyp.iter.first != ikyp.iter.second ) ikyp++; */ +/* return ( ikyp.iter.first != ikyp.iter.second ); */ +/* } */ + bool contains ( const Y& y, const K& k ) const { + return ( MAP_CONTAINER >::get(k).contains(y) ); + } + bool contains ( const K& k ) const { + return ( MAP_CONTAINER >::contains(k) ); + } +/* P getProb ( const IterVal& ikyp, const K& k ) const { */ +/* if ( ikyp.iter.first == ikyp.iter.second ) { cerr<<"ERROR: no iterator to fix probability: "<second ); */ +/* } */ + P getProb ( const Y& y, const K& k ) const { + return MAP_CONTAINER >::get(k).get(y); + } + const MAP_CONTAINER& getDist ( const K& k ) const { + return MAP_CONTAINER >::get(k); + } + P& setProb ( const Y& y, const K& k ) { + return MAP_CONTAINER >::set(k).set(y); + } + void normalize ( ) { + for ( typename HKYP::iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) { + K k=ik->first; + P p=P(); + for ( typename distribution::iterator itd = ik->second.begin(); itd != ik->second.end(); ++itd ) + p += itd->second; + if (p!=P()) + for ( typename distribution::iterator itd = ik->second.begin(); itd != ik->second.end(); ++itd ) + itd->second /= p; + } + } +/* void transmit ( int tSockfd, const char* psId ) const { */ +/* for ( typename HKYP::const_iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) { */ +/* K k=ik->first; */ +/* IterVal y; */ +/* // For each non-zero probability in model... */ +/* for ( bool b=setFirst(y,k); b; b=setNext(y,k) ) { */ +/* //if ( getProb(y,k) != P() ) { */ +/* String str(1000); */ +/* str<first; + const distribution& dist = ik->second; + for ( typename distribution::const_iterator itd = dist.begin(); itd != dist.end(); ++itd ) { + const Y& y = itd->first; + os<& m ) { */ +/* for ( typename HKYP::const_iterator ik=m.HKYP::begin(); ik!=m.HKYP::end(); ik++ ) { */ +/* K k=ik->first; */ +/* IterVal y; */ +/* for ( bool b=m.setFirst(y,k); b; b=m.setNext(y,k) ) */ +/* setProb(y,k) = m.getProb(y,k); */ +/* } */ +/* } */ + void clear ( ) { MAP_CONTAINER >::clear(); } + + //friend pair*> operator>> ( IStream is, GenericCPTModel& m ) { + // return pair*>(is,&m); } + friend IStream operator>> ( pair*> is_m, const char* psD ) { + Y y; K k; IStream is,is1; GenericCPTModel& m = *is_m.second; + is=is_m.first; + if ( is==IStream() ) return is; + is=is>>psLbl; + while((is1=is>>" ")!=IStream())is=is1; + ////cerr<<"reading k...\n"; + is=is>>k>>" "; + ////cerr<<" ...k='"<>" ")!=IStream())is=is1; + is=is>>": "; + while((is1=is>>" ")!=IStream())is=is1; + ////cerr<<"reading y...\n"; + is=is>>y>>" "; + ////cerr<<" ...y='"<>" ")!=IStream())is=is1; + is=is>>"= "; + while((is1=is>>" ")!=IStream())is=is1; + ////cerr<<"reading pr...\n"; + return (is!=IStream()) ? is>>m.setProb(y,k)>>psD : is; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, GenericCPTModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + Y y; K k; StringInput si,si2; GenericCPTModel& m = *si_m.second; + si=si_m.first; + if ( si==NULL ) return si; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>k>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>": "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>y>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>"= "; + while((si2=si>>" ")!=NULL)si=si2; + return (si!=NULL) ? si>>m.setProb(y,k)>>psD : si; + } +}; + + +//////////////////////////////////////////////////////////////////////////////// + +//////////////////// +template +class CPT1DModel : public GenericCPTModel,P,psLbl> { + typedef GenericCPTModel,P,psLbl> ParentType; + typedef CPT1DModel ThisType; + public: +/* typedef typename GenericCPTModel,P>::IterVal IterVal; */ + +/* bool setFirst ( IterVal& ixyp ) const { */ +/* return GenericCPTModel,P>::setFirst ( ixyp, MapKey1D(Unit()) ); */ +/* } */ +/* bool setNext ( IterVal& ixyp ) const { */ +/* return GenericCPTModel,P>::setNext ( ixyp, MapKey1D(Unit()) ); */ +/* } */ + bool contains ( const Y& y ) const { + return GenericCPTModel,P,psLbl>::contains ( y, MapKey1D(Unit()) ); + } +/* P getProb ( const IterVal& ixyp ) const { */ +/* return GenericCPTModel,P>::getProb ( ixyp, MapKey1D(Unit()) ); */ +/* } */ + P getProb ( const Y& y ) const { + return GenericCPTModel,P,psLbl>::getProb ( y, MapKey1D(Unit()) ); + } + const typename GenericCPTModel,P,psLbl>::distribution& getDist ( ) const { + return GenericCPTModel,P,psLbl>::get ( MapKey1D(Unit()) ); + } + P& setProb ( const Y& y ) { + return GenericCPTModel,P,psLbl>::setProb ( y, MapKey1D(Unit()) ); + } + bool readFields ( Array& aps ) { + if ( 3==aps.size() ) { + GenericCPTModel,P,psLbl>::setProb ( Y(aps[1]), MapKey1D(Unit()) ) = atof(aps[2]); + return true; + } + return false; + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + +//////////////////// +template +class CPT2DModel : public GenericCPTModel,P,psLbl> { + typedef GenericCPTModel,P,psLbl> ParentType; + typedef CPT2DModel ThisType; + public: +/* typedef typename GenericCPTModel,P>::IterVal IterVal; */ + +/* // This stuff only for deterministic 'Determ' models... */ +/* typedef X1 Dep1Type; */ +/* typedef P ProbType; */ +/* bool hasDeterm ( const X1& x1 ) { IterVal y; bool b=setFirst(y,x1); return b; } */ +/* Y getDeterm ( const X1& x1 ) { IterVal y; bool b=setFirst(y,x1); if(!b)cerr<<"ERROR: determ case missing: "<,P>::setFirst ( ixyp, MapKey1D(x1) ); */ +/* } */ +/* bool setNext ( IterVal& ixyp, const X1& x1 ) const { */ +/* return GenericCPTModel,P>::setNext ( ixyp, MapKey1D(x1) ); */ +/* } */ + bool contains ( const Y& y, const X1& x1 ) const { + return GenericCPTModel,P,psLbl>::contains ( y, MapKey1D(x1) ); + } + bool contains ( const X1& x1 ) const { + return GenericCPTModel,P,psLbl>::contains ( MapKey1D(x1) ); + } +/* P getProb ( const IterVal& ixyp, const X1& x1 ) const { */ +/* return GenericCPTModel,P>::getProb ( ixyp, MapKey1D(x1) ); */ +/* } */ + P getProb ( const Y& y, const X1& x1 ) const { + return GenericCPTModel,P,psLbl>::getProb ( y, MapKey1D(x1) ); + } + const typename GenericCPTModel,P,psLbl>::distribution& getDist ( const X1& x1 ) const { + return GenericCPTModel,P,psLbl>::get ( MapKey1D(x1) ); + } + P& setProb ( const Y& y, const X1& x1 ) { + return GenericCPTModel,P,psLbl>::setProb ( y, MapKey1D(x1) ); + } + bool readFields ( Array& aps ) { + if ( 4==aps.size() ) { + GenericCPTModel,P,psLbl>::setProb ( Y(aps[2]), MapKey1D(aps[1]) ) = atof(aps[3]); + return true; + } + return false; + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + +//////////////////// +template +class CPT3DModel : public GenericCPTModel,P,psLbl> { + typedef GenericCPTModel,P,psLbl> ParentType; + typedef CPT3DModel ThisType; + public: +/* typedef typename GenericCPTModel,P>::IterVal IterVal; */ + +/* bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2 ) const { */ +/* return GenericCPTModel,P>::setFirst ( ixyp, MapKey2D(x1,x2) ); */ +/* } */ +/* bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2 ) const { */ +/* return GenericCPTModel,P>::setNext ( ixyp, MapKey2D(x1,x2) ); */ +/* } */ + bool contains ( const Y& y, const X1& x1, const X2& x2 ) const { + return GenericCPTModel,P,psLbl>::contains ( y, MapKey2D(x1,x2) ); + } + bool contains ( const X1& x1, const X2& x2 ) const { + return GenericCPTModel,P,psLbl>::contains ( MapKey2D(x1,x2) ); + } +/* P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2 ) const { */ +/* return GenericCPTModel,P>::getProb ( ixyp, MapKey2D(x1,x2) ); */ +/* } */ + P getProb ( const Y& y, const X1& x1, const X2& x2 ) const { + return GenericCPTModel,P,psLbl>::getProb ( y, MapKey2D(x1,x2) ); + } + const typename GenericCPTModel,P,psLbl>::distribution& getDist ( const X1& x1, const X2& x2 ) const { + return GenericCPTModel,P,psLbl>::get ( MapKey2D(x1,x2) ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2 ) { + return GenericCPTModel,P,psLbl>::setProb ( y, MapKey2D(x1,x2) ); + } + bool readFields ( Array& aps ) { + if ( 5==aps.size() ) { + GenericCPTModel,P,psLbl>::setProb ( Y(aps[3]), MapKey2D(aps[1],aps[2]) ) = atof(aps[4]); + return true; + } + return false; + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + +//////////////////// +template +class CPT4DModel : public GenericCPTModel,P,psLbl> { + typedef GenericCPTModel,P,psLbl> ParentType; + typedef CPT4DModel ThisType; + public: +/* typedef typename GenericCPTModel,P>::IterVal IterVal; */ + +/* bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const { */ +/* return GenericCPTModel,P>::setFirst ( ixyp, MapKey3D(x1,x2,x3) ); */ +/* } */ +/* bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const { */ +/* return GenericCPTModel,P>::setNext ( ixyp, MapKey3D(x1,x2,x3) ); */ +/* } */ + bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericCPTModel,P,psLbl>::contains ( y, MapKey3D(x1,x2,x3) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericCPTModel,P,psLbl>::contains ( MapKey3D(x1,x2,x3) ); + } +/* P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const { */ +/* return GenericCPTModel,P>::getProb ( ixyp, MapKey3D(x1,x2,x3) ); */ +/* } */ + P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericCPTModel,P,psLbl>::getProb ( y, MapKey3D(x1,x2,x3) ); + } + const typename GenericCPTModel,P,psLbl>::distribution& getDist ( const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericCPTModel,P,psLbl>::get ( MapKey3D(x1,x2,x3) ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) { + return GenericCPTModel,P,psLbl>::setProb ( y, MapKey3D(x1,x2,x3) ); + } + bool readFields ( Array& aps ) { + if ( 6==aps.size() ) { + GenericCPTModel,P,psLbl>::setProb ( Y(aps[4]), MapKey3D(aps[1],aps[2],aps[3]) ) = atof(aps[5]); + return true; + } + return false; + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + +//////////////////// +template +class CPT5DModel : public GenericCPTModel,P,psLbl> { + typedef GenericCPTModel,P,psLbl> ParentType; + typedef CPT5DModel ThisType; + public: +/* typedef typename GenericCPTModel,P>::IterVal IterVal; */ + +/* bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { */ +/* return GenericCPTModel,P>::setFirst ( ixyp, MapKey4D(x1,x2,x3,x4) ); */ +/* } */ +/* bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { */ +/* return GenericCPTModel,P>::setNext ( ixyp, MapKey4D(x1,x2,x3,x4) ); */ +/* } */ + bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericCPTModel,P,psLbl>::contains ( y, MapKey4D(x1,x2,x3,x4) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericCPTModel,P,psLbl>::contains ( MapKey4D(x1,x2,x3,x4) ); + } +/* P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { */ +/* return GenericCPTModel,P>::getProb ( ixyp, MapKey4D(x1,x2,x3,x4) ); */ +/* } */ + P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericCPTModel,P,psLbl>::getProb ( y, MapKey4D(x1,x2,x3,x4) ); + } + const typename GenericCPTModel,P,psLbl>::distribution& getDist ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericCPTModel,P,psLbl>::get ( MapKey4D(x1,x2,x3,x4) ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) { + return GenericCPTModel,P,psLbl>::setProb ( y, MapKey4D(x1,x2,x3,x4) ); + } + bool readFields ( Array& aps ) { + if ( 7==aps.size() ) { + GenericCPTModel,P,psLbl>::setProb ( Y(aps[5]), MapKey4D(aps[1],aps[2],aps[3],aps[4]) ) = atof(aps[6]); + return true; + } + return false; + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + +//////////////////// +template +class CPT6DModel : public GenericCPTModel,P,psLbl> { + typedef GenericCPTModel,P,psLbl> ParentType; + typedef CPT6DModel ThisType; + public: +/* typedef typename GenericCPTModel,P>::IterVal IterVal; */ + +/* bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { */ +/* return GenericCPTModel,P>::setFirst ( ixyp, MapKey5D(x1,x2,x3,x4,x5) ); */ +/* } */ +/* bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { */ +/* return GenericCPTModel,P>::setNext ( ixyp, MapKey5D(x1,x2,x3,x4,x5) ); */ +/* } */ + bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericCPTModel,P,psLbl>::contains ( y, MapKey5D(x1,x2,x3,x4,x5) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericCPTModel,P,psLbl>::contains ( MapKey5D(x1,x2,x3,x4,x5) ); + } +/* P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { */ +/* return GenericCPTModel,P>::getProb ( ixyp, MapKey5D(x1,x2,x3,x4,x5) ); */ +/* } */ + P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericCPTModel,P,psLbl>::getProb ( y, MapKey5D(x1,x2,x3,x4,x5) ); + } + const typename GenericCPTModel,P,psLbl>::distribution& getDist ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericCPTModel,P,psLbl>::get ( MapKey5D(x1,x2,x3,x4,x5) ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) { + return GenericCPTModel,P,psLbl>::setProb ( y, MapKey5D(x1,x2,x3,x4,x5) ); + } + bool readFields ( Array& aps ) { + if ( 8==aps.size() ) { + GenericCPTModel,P,psLbl>::setProb ( Y(aps[6]), MapKey5D(aps[1],aps[2],aps[3],aps[4],aps[5]) ) = atof(aps[7]); + return true; + } + return false; + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + + + + + + + + + + + + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +/* +template +class HidVarBaseIterVal : public pair > >,Id > { + public: + typedef Y VAR; + static const int NUM_ITERS = 1; + operator Y ( ) const { return pair > >,Id >::first.getRef().get(pair > >,Id >::second.toInt()).first; } +}; +*/ + +template +//class GenericHidVarCPTModel : public SimpleHash > > { +class GenericHidVarCPTModel : public SimpleHash > { + private: + //typedef SimpleHash > HKYP; + //typedef SimpleHash > > HKYP; + typedef SimpleHash > HKYP; + typedef int IYP; + ////HKYP h; + + public: + typedef Y RandVarType; + typedef Y RVType; + typedef typename Y::template ArrayIterator

IterVal; + + typename HKYP::const_iterator begin ( ) const { return HKYP::begin(); } + typename HKYP::iterator begin ( ) { return HKYP::begin(); } + typename Y::template ArrayIterator

begin ( const K& k ) const { + typename Y::template ArrayIterator

iyp; iyp.first = HKYP::get(k); iyp.second = 0; return iyp; + } + P setIterProb ( typename Y::template ArrayIterator

& iyp, const K& k, int& vctr ) const { + P pr; + // If fork happens before this var, set val to first in distrib and return prob=1.0... + if ( vctr < 0 ) { + iyp.first = HKYP::get(k); + iyp.second = 0; + pr = ( iyp.first.getRef().get(0).second != P() ) ? P(1.0) : P(); + if (pr == P()){ + cerr << "ERROR: Some condition has no value! Key = '" << k << "' vctr=" << vctr << "\n"; + } + //return P(1.0); + } + // If fork happens at this var, set val to next in distrib and return prob of val (prob=0.0 if nothing is next)... + // NOTE: falling off the end of a distribution is not a model error; only failing to start a new distribution! + else if ( vctr == 0 ) { + if (iyp.second+1& iyp, const K& k ) const { + return iyp.first.getRef().get(iyp.second.toInt()).second; + } + const typename Y::template ArrayDistrib

& getDistrib ( const K& k ) const { + return HKYP::get(k); + } + + P& setProb ( const Y& y, const K& k ) { + pair& yp = HKYP::set(k).add(); + yp.first = y; + return yp.second; + } + typename Y::template ArrayDistrib

& setDistrib ( const K& k ) { + return HKYP::set(k); + } + void normalize ( ) { + // NOTE: BEAR IN MIND, LOGPROBS CAN'T BE ADDED!!!!! + for ( typename HKYP::iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) { + P prTot; + for ( unsigned int i=0; isecond.size(); i++ ) { + prTot += ik->second.get(i).second; + } + for ( unsigned int i=0; isecond.size(); i++ ) { + ik->second.set(i).second /= prTot; + } + } + } + void dump ( ostream& os, const char* psId ) const { + for ( typename HKYP::const_iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) { + K k=ik->first; + //IterVal y; + //for ( bool b=setFirst(y,k); b; b=setNext(y,k) ) + for ( unsigned int i=0; isecond.size(); i++ ) { + const pair& yp = ik->second.get(i); + os<*> operator>> ( IStream is, GenericHidVarCPTModel& m ) { +// return pair*>(is,&m); } + friend IStream operator>> ( pair*> is_m, const char* psD ) { + Y y; K k; IStream is,is1; GenericHidVarCPTModel& m = *is_m.second; + is=is_m.first; + if ( is==IStream() ) return is; + is=is>>psLbl; + while((is1=is>>" ")!=IStream())is=is1; + ////cerr<<"reading k...\n"; + is=is>>k>>" "; + ////cerr<<" ...k='"<>" ")!=IStream())is=is1; + is=is>>": "; + while((is1=is>>" ")!=IStream())is=is1; + ////cerr<<"reading y...\n"; + is=is>>y>>" "; + ////cerr<<" ...y='"<>" ")!=IStream())is=is1; + is=is>>"= "; + while((is1=is>>" ")!=IStream())is=is1; + ////cerr<<"reading pr...\n"; + return (is!=IStream()) ? is>>m.setProb(y,k)>>psD : is; + } + + // OBSOLETE! + friend pair*> operator>> ( StringInput si, GenericHidVarCPTModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + Y y; K k; StringInput si,si2; GenericHidVarCPTModel& m = *si_m.second; + si=si_m.first; + if ( si==NULL ) return si; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>k>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>": "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>y>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>"= "; + while((si2=si>>" ")!=NULL)si=si2; + return (si!=NULL) ? si>>m.setProb(y,k)>>psD : si; + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +//////////////////// +template +class HidVarCPT1DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT1DModel ThisType; + public: + HidVarCPT1DModel ( ) { } + HidVarCPT1DModel ( const Y& y ) { setProb(y)=P(1.0); } + P setIterProb ( typename Y::template ArrayIterator

& iyp, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey1D(), vctr ); + } + P& setProb ( const Y& y ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey1D() ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey1D() ); + } + const typename Y::template ArrayDistrib

& getDistrib ( ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey1D() ); + } + bool contains ( ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey1D()); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + +//////////////////// +template +class HidVarCPT2DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT2DModel ThisType; + public: + P setIterProb ( typename Y::template ArrayIterator

& iyp, const X1& x1, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey1D(x1), vctr ); + } + P& setProb ( const Y& y, const X1& x1 ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey1D(x1) ); + } + typename Y::template ArrayDistrib

& setDistrib ( const X1& x1 ) { + return GenericHidVarCPTModel,P,psLbl>::setDistrib ( MapKey1D(x1) ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp, const X1& x1) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey1D(x1) ); + } + const typename Y::template ArrayDistrib

& getDistrib ( const X1& x1 ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey1D(x1) ); + } + bool contains ( const X1& x1 ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey1D(x1)); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + +//////////////////// +template +class HidVarCPT3DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT3DModel ThisType; + public: + P setIterProb ( typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey2D(x1,x2), vctr ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2 ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey2D(x1,x2) ); + } + typename Y::template ArrayDistrib

& setDistrib ( const X1& x1, const X2& x2 ) { + return GenericHidVarCPTModel,P,psLbl>::setDistrib ( MapKey2D(x1,x2) ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2 ) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey2D(x1,x2) ); + } + const typename Y::template ArrayDistrib

& getDistrib ( const X1& x1, const X2& x2 ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey2D(x1,x2) ); + } + bool contains ( const X1& x1, const X2& x2 ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey2D(x1,x2)); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + +//////////////////// +template +class HidVarCPT4DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT4DModel ThisType; + public: + P setIterProb ( typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey3D(x1,x2,x3), vctr ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey3D(x1,x2,x3) ); + } + typename Y::template ArrayDistrib

& setDistrib ( const X1& x1, const X2& x2, const X3& x3 ) { + return GenericHidVarCPTModel,P,psLbl>::setDistrib ( MapKey3D(x1,x2,x3) ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey3D(x1,x2,x3) ); + } + const typename Y::template ArrayDistrib

& getDistrib ( const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey3D(x1,x2,x3) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3 ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey3D(x1,x2,x3)); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + +//////////////////// +template +class HidVarCPT5DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT5DModel ThisType; + public: + P setIterProb ( typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey4D(x1,x2,x3,x4), vctr ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey4D(x1,x2,x3,x4) ); + } + typename Y::template ArrayDistrib

& setDistrib ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) { + return GenericHidVarCPTModel,P,psLbl>::setDistrib ( MapKey4D(x1,x2,x3,x4) ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey4D(x1,x2,x3,x4) ); + } + const typename Y::template ArrayDistrib

& getDistrib ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey4D(x1,x2,x3,x4) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey4D(x1,x2,x3,x4)); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + +//////////////////// +template +class HidVarCPT6DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT6DModel ThisType; + public: + P setIterProb ( typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey5D(x1,x2,x3,x4,x5), vctr ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey5D(x1,x2,x3,x4,x5) ); + } + typename Y::template ArrayDistrib

& setDistrib ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) { + return GenericHidVarCPTModel,P,psLbl>::setDistrib ( MapKey5D(x1,x2,x3,x4,x5) ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey5D(x1,x2,x3,x4,x5) ); + } + const typename Y::template ArrayDistrib

& getDistrib ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey5D(x1,x2,x3,x4,x5) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey5D(x1,x2,x3,x4,x5)); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + +//////////////////// +template +class HidVarCPT7DModel : public GenericHidVarCPTModel,P,psLbl> { + typedef GenericHidVarCPTModel,P,psLbl> ParentType; + typedef HidVarCPT7DModel ThisType; + public: + P setIterProb ( typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, const X6& x6, int& vctr ) const { + return GenericHidVarCPTModel,P,psLbl>::setIterProb ( iyp, MapKey6D(x1,x2,x3,x4,x5,x6), vctr ); + } + P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, const X6& x6 ) { + return GenericHidVarCPTModel,P,psLbl>::setProb ( y, MapKey6D(x1,x2,x3,x4,x5,x6) ); + } + typename Y::template ArrayDistrib

& setDistrib ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, const X6& x6 ) { + return GenericHidVarCPTModel,P,psLbl>::setDistrib ( MapKey6D(x1,x2,x3,x4,x5,x6) ); + } + P getProb ( const typename Y::template ArrayIterator

& iyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, const X6& x6 ) const { + return GenericHidVarCPTModel,P,psLbl>::getProb ( iyp, MapKey6D(x1,x2,x3,x4,x5,x6) ); + } + const typename Y::template ArrayDistrib

& getDistrib ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, const X6& x6 ) const { + return GenericHidVarCPTModel,P,psLbl>::getDistrib ( MapKey6D(x1,x2,x3,x4,x5,x6) ); + } + bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5, const X6& x6 ) const { + return GenericHidVarCPTModel,P,psLbl>::contains(MapKey6D(x1,x2,x3,x4,x5,x6)); + } + friend pair operator>> ( IStream is, ThisType& m ) { return pair(is,&m); } +}; + + + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +template +class SingleFactoredModel { + private: + M1 m1; + public: + const M1& getM1 ( ) const { return m1; } + M1& setM1 ( ) { return m1; } + void subsume ( SingleFactoredModel& fm ) { m1.subsume(fm.m1); } + void clear ( ) { m1.clear(); } + bool readFields ( Array& aps ) { return ( m1.readFields(aps) ); } + friend pair*> operator>> ( StringInput si, SingleFactoredModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + return si_m.first>>si_m.second->m1>>psD; } +}; + +template +class DoubleFactoredModel { + private: + M1 m1; M2 m2; + public: + const M1& getM1 ( ) const { return m1; } + const M2& getM2 ( ) const { return m2; } + M1& setM1 ( ) { return m1; } + M2& setM2 ( ) { return m2; } + void subsume ( DoubleFactoredModel& fm ) { m1.subsume(fm.m1); m2.subsume(fm.m2); } + void clear ( ) { m1.clear(); m2.clear(); } + bool readFields ( Array& aps ) { return ( m1.readFields(aps) || m2.readFields(aps) ); } + friend pair*> operator>> ( StringInput si, DoubleFactoredModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + StringInput si; return ( (si=si_m.first>>si_m.second->m1>>psD)!=NULL || + (si=si_m.first>>si_m.second->m2>>psD)!=NULL ) ? si : NULL; } +}; + +template +class TripleFactoredModel { + private: + M1 m1; M2 m2; M3 m3; + public: + const M1& getM1 ( ) const { return m1; } + const M2& getM2 ( ) const { return m2; } + const M3& getM3 ( ) const { return m3; } + M1& setM1 ( ) { return m1; } + M2& setM2 ( ) { return m2; } + M3& setM3 ( ) { return m3; } + void subsume ( TripleFactoredModel& fm ) { m1.subsume(fm.m1); m2.subsume(fm.m2); m3.subsume(fm.m3); } + void clear ( ) { m1.clear(); m2.clear(); m3.clear(); } + bool readFields ( Array& aps ) { return ( m1.readFields(aps) || m2.readFields(aps) || m3.readFields(aps) ); } + friend pair*> operator>> ( StringInput si, TripleFactoredModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + StringInput si; return ( (si=si_m.first>>si_m.second->m1>>psD)!=NULL || + (si=si_m.first>>si_m.second->m2>>psD)!=NULL || + (si=si_m.first>>si_m.second->m3>>psD)!=NULL ) ? si : NULL; } +}; + +template +class QuadrupleFactoredModel { + private: + M1 m1; M2 m2; M3 m3;M4 m4; + public: + const M1& getM1 ( ) const { return m1; } + const M2& getM2 ( ) const { return m2; } + const M3& getM3 ( ) const { return m3; } + const M4& getM4 ( ) const { return m4; } + M1& setM1 ( ) { return m1; } + M2& setM2 ( ) { return m2; } + M3& setM3 ( ) { return m3; } + M4& setM4 ( ) { return m4; } + void subsume ( QuadrupleFactoredModel& fm ) { m1.subsume(fm.m1); m2.subsume(fm.m2); m3.subsume(fm.m3); m4.subsum(fm.m4); } + void clear ( ) { m1.clear(); m2.clear(); m3.clear(); m4.clear(); } + bool readFields ( Array& aps ) { return ( m1.readFields(aps) || m2.readFields(aps) || m3.readFields(aps) || m4.readFields(aps) ); } + friend pair*> operator>> ( StringInput si, QuadrupleFactoredModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + StringInput si; return ( (si=si_m.first>>si_m.second->m1>>psD)!=NULL || + (si=si_m.first>>si_m.second->m2>>psD)!=NULL || + (si=si_m.first>>si_m.second->m3>>psD)!=NULL || + (si=si_m.first>>si_m.second->m4>>psD)!=NULL ) ? si : NULL; } +}; + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +/* +template +class ComplexSingleIteratedModeledRV { + public: + static const int NUM_ITERS = IMRV1::NUM_ITERS; + IMRV1 iter_first; + ComplexSingleIteratedModeledRV ( ) { } + ComplexSingleIteratedModeledRV ( const ComplexSingleIteratedModeledRV& imrv ) : iter_first(imrv.iter_first) { } + ComplexSingleIteratedModeledRV ( const IMRV1& imrv1 ) : iter_first(imrv1) { } + void write ( FILE* pf ) const { iter_first.write(pf); } + friend ostream& operator<< ( ostream& os, const ComplexSingleIteratedModeledRV& rv ) { return os< +class ComplexDoubleIteratedModeledRV { + public: + static const int NUM_ITERS = IMRV1::NUM_ITERS + IMRV2::NUM_ITERS; +// typedef DelimitedJoint2DRV VAR; + IMRV1 iter_first; + IMRV2 iter_second; + ComplexDoubleIteratedModeledRV ( ) { } + ComplexDoubleIteratedModeledRV ( const ComplexDoubleIteratedModeledRV& imrv ) : iter_first(imrv.iter_first), iter_second(imrv.iter_second) { } + ComplexDoubleIteratedModeledRV ( const IMRV1& imrv1, const IMRV2& imrv2 ) : iter_first(imrv1), iter_second(imrv2) { } +// operator VAR() { return VAR(iter_first,iter_second); } + void write ( FILE* pf ) const { iter_first.write(pf); fprintf(pf,","); iter_second.write(pf); } + friend ostream& operator<< ( ostream& os, const ComplexDoubleIteratedModeledRV& rv ) { return os< +class ComplexTripleIteratedModeledRV { + public: + static const int NUM_ITERS = IMRV1::NUM_ITERS + IMRV2::NUM_ITERS + IMRV3::NUM_ITERS; + IMRV1 iter_first; + IMRV2 iter_second; + IMRV3 iter_third; + ComplexTripleIteratedModeledRV ( ) { } + ComplexTripleIteratedModeledRV ( const ComplexTripleIteratedModeledRV& imrv ) : iter_first(imrv.iter_first), iter_second(imrv.iter_second), iter_third(imrv.iter_third) { } + ComplexTripleIteratedModeledRV ( const IMRV1& imrv1, const IMRV2& imrv2, const IMRV3& imrv3 ) : iter_first(imrv1), iter_second(imrv2), iter_third(imrv3) { } + void write ( FILE* pf ) const { iter_first.write(pf); fprintf(pf,","); iter_second.write(pf); fprintf(pf,","); iter_third.write(pf); } + friend ostream& operator<< ( ostream& os, const ComplexTripleIteratedModeledRV& rv ) + { return os< +class ComplexQuadrupleIteratedModeledRV { + public: + static const int NUM_ITERS = IMRV1::NUM_ITERS + IMRV2::NUM_ITERS + IMRV3::NUM_ITERS + IMRV4::NUM_ITERS; + IMRV1 iter_first; + IMRV2 iter_second; + IMRV3 iter_third; + IMRV4 iter_fourth; + ComplexQuadrupleIteratedModeledRV ( ) { } + ComplexQuadrupleIteratedModeledRV ( const ComplexQuadrupleIteratedModeledRV& imrv ) : iter_first(imrv.iter_first), iter_second(imrv.iter_second), + iter_third(imrv.iter_third), iter_fourth(imrv.iter_fourth) { } + ComplexQuadrupleIteratedModeledRV ( const IMRV1& imrv1, const IMRV2& imrv2, const IMRV3& imrv3, const IMRV4& imrv4 ) : iter_first(imrv1), iter_second(imrv2), + iter_third(imrv3), iter_fourth(imrv4) { } + void write ( FILE* pf ) const { iter_first.write(pf); fprintf(pf,","); iter_second.write(pf); fprintf(pf,","); iter_third.write(pf); fprintf(pf,","); iter_fourth.write(pf); } + friend ostream& operator<< ( ostream& os, const ComplexQuadrupleIteratedModeledRV& rv ) + { return os< +class ComplexArrayIteratedModeledRV { + public: + static const int NUM_ITERS = IMRV::NUM_ITERS * I; + DelimitedStaticSafeArray iter_array; + void write ( FILE* pf ) const { for(int i=0;i& rv ) { return os<. // +// // +// ModelBlocks developers designate this particular file as subject to // +// the "Moses" exception as provided by ModelBlocks developers in // +// the LICENSE file that accompanies this code. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _NL_DENOT__ +#define _NL_DENOT__ + +#include "nl-safeids.h" +#include "nl-prob.h" +#include "nl-const.h" +#include "nl-randvar.h" +#include "nl-cpt.h" +#include + +//////////////////////////////////////////////////////////////////////////////// +// +// VecE +// +//////////////////////////////////////////////////////////////////////////////// + +template +class VecE : public StaticSafeArray { // Coindexation vector + public: + typedef I IntType; + typedef RC RCType; + static const int NUM_ENTS; + // Constructor / destructor methods... + VecE ( ) : StaticSafeArray(-1) { } + VecE ( const string& s, RC& rc ) : StaticSafeArray(-1) { read(s,rc); } + // Specification methods... + void unOffset ( const ReaderContext& rc ) { } + void pushOffsets ( const ReaderContext& rc ) { } + // Extraction methods... + //int getHashConst ( const int m ) const { int k=1; for(int i=0;i::get(i)); k%=m; } return k; } + size_t getHashKey ( ) const { size_t k=0; for(int i=0;i::get(i).toInt(); } return k; } + bool operator< (const VecE& e) const { return StaticSafeArray::operator<(e); } + bool operator==(const VecE& e) const { return StaticSafeArray::operator==(e); } + void readercontextPush ( const ReaderContext& rc ) const { } + void readercontextPop ( const ReaderContext& rc ) const { } + // Input / output methods... + void read ( char* ps, const ReaderContext& ) ; + void write ( FILE*, ReaderContext& ) const ; + void write ( FILE* pf ) const { ReaderContext rc; write(pf,rc); } + string getString() const { ReaderContext rc; return getString(rc); } + string getString( ReaderContext& ) const; +}; +template const int VecE::NUM_ENTS = N; + +//////////////////// +template +void VecE::read ( char* ps, const ReaderContext& rc ) { + /* + ////fprintf(stderr,"VecE::VecE in\n"); + int i; + string::size_type j; + // Chop into individual coinds strings... + for ( i=0, j=0; s!="" && s!="," && j!=string::npos; i++, j=s.find_first_of(',',j), j=(-1==j)?j:j+1 ) + StaticSafeArray::set(i) = s.substr ( j, s.find_first_of(',',j)-j ); + ////fprintf(stderr,"VecE::VecE out\n"); + */ + char* psT; int i=0; + for ( char* psU=strtok_r(ps,",",&psT); + psU && i::set(i) = psU; +} + +//////////////////// +template +void VecE::write ( FILE* pf, ReaderContext& rc ) const { + for(int i=0; i::get(i) >= 0 ) { + if(i) fprintf(pf,","); StaticSafeArray::get(i).write(pf); + } +} + +template +string VecE::getString( ReaderContext& rc ) const { + string rString; + for(int i=0; i::get(i) >= 0 ) { + if(i) rString += ","; + rString += StaticSafeArray::get(i).getString(); + } + return rString; +} + +//////////////////////////////////////////////////////////////////////////////// +// +// VecVReaderContext +// +//////////////////////////////////////////////////////////////////////////////// + +class VecVReaderContext : public ReaderContext { + public: + map msi; + int offset; + VecVReaderContext ( ) { offset=0; } +}; + +//////////////////////////////////////////////////////////////////////////////// +// +// VecV +// +//////////////////////////////////////////////////////////////////////////////// + +template +class VecV : public StaticSafeArray { // Coindexation vector + public: + typedef I IntType; + typedef RC RCType; + static const int NUM_ENTS; + static const int NUM_ENTS_DEP1; + static const int NUM_ENTS_DEP2; + // Constructor / destructor methods... + VecV ( ) : StaticSafeArray(-1) { } + VecV ( const string& s, VecVReaderContext& rc ) : StaticSafeArray(-1) { read(s,rc); } + // Specification methods... + void unOffset ( const RCType& rc ) { for(int i=0; i::get(i)!=-1) StaticSafeArray::set(i) += -rc.offset; } + void pushOffsets ( RCType& rc ) { for(typename map::iterator i=rc.msi.begin(); i!=rc.msi.end(); i++) i->second+=NUM_ENTS; } + // Extraction methods... + //int getHashConst ( const int m ) const { int k=1; for(int i=0;i::get(i); k%=m; } return k; } + size_t getHashKey ( ) const { size_t k=0; for(int i=0;i::get(i).toInt(); } return k; } + bool operator< (const VecV& v) const { return StaticSafeArray::operator<(v); } + bool operator==(const VecV& v) const { return StaticSafeArray::operator==(v); } + void readercontextPush ( VecVReaderContext& rc ) const { rc.offset+=N; } + void readercontextPop ( VecVReaderContext& rc ) const { rc.offset-=N; } + // Input / output methods... + void read ( char*, VecVReaderContext& ) ; + void write ( FILE*, RC& ) const ; + void write ( FILE* pf ) const { RC rc; write(pf,rc); } + string getString( RC& ) const; + string getString() const { RC rc; return getString(rc); } +}; +template const int VecV::NUM_ENTS = N; +template const int VecV::NUM_ENTS_DEP1 = ND1; +template const int VecV::NUM_ENTS_DEP2 = ND2; + +//////////////////// +template +void VecV::read ( char* ps, VecVReaderContext& rc ) { + ////fprintf(stderr,"VecV::VecV in %d\n",rc.offset); + StaticSafeArray asV; + + // Chop into individual coinds strings... + char* psT; int i=0; + for ( char* psU=strtok_r(ps,",",&psT); + psU && i=0; i-- ) { + // Add to map if new coind... + if (rc.msi.find(asV.get(i)) == rc.msi.end()) rc.msi[asV.get(i)]=i+rc.offset; + StaticSafeArray::set(i)=rc.msi[asV.get(i)]; + } + ////fprintf(stderr,"VecV::read out\n"); + //for(int i=0; i::get(i))); fprintf(stderr,"\n"); + //write(stderr); +} + +//////////////////// +template +void VecV::write ( FILE* pf, RC& rc ) const { + for(int i=0; i::get(i).toInt())); + if(StaticSafeArray::get(i) >= 0 ) + fprintf(pf,"%sv%02d",(i)?",":"",(rc.offset)+int(StaticSafeArray::get(i).toInt())); +} + +template +string VecV::getString (RC& rc) const { + string rString; + for(int i=0; i::get(i) >= 0 ) { + rString += (i)?",":""; + char* tmp = new char[4]; + // char tmp* = (char*)malloc(4*sizeof(char)); + sprintf(tmp, "v%02d\0",(rc.offset)+int(StaticSafeArray::get(i).toInt())); + rString += tmp; + delete tmp; + // free(tmp); + } + return rString; +} + +//////////////////////////////////////////////////////////////////////////////// +// +// VecV Compositors +// +//////////////////////////////////////////////////////////////////////////////// + +template +class JointVecV { //// : public StaticSafeArray { + private: + V1 v1; + V2 v2; + public: + typedef typename V1::IntType IntType; + typedef typename V1::RCType RCType; + static const int NUM_ENTS; + // Constructor / destructor methods... + JointVecV ( ) { } + JointVecV ( const V1& a1, const V2& a2 ) { + ////fprintf(stderr,"iJoin "); a1.V1::write(stderr); fprintf(stderr," "); a2.V2::write(stderr); fprintf(stderr,"\n"); + for (int i=0; i::write(stderr); fprintf(stderr,"\n"); + } + // Specification methods... + typename V1::IntType& set(int i) { assert(i>=0); assert(i::iterator i=rc.msi.begin(); i!=rc.msi.end(); i++) i->second+=NUM_ENTS; } + // Extraction methods... + //int getHashConst ( const int m ) const { return((v1.getHashConst(m) * v2.getHashConst(m))%m); } + //int getHashKey ( const int m ) const { int k=((v1.getHashKey(m)*v2.getHashConst(m))+v2.getHashKey(m))%m; return k; } + size_t getHashKey ( ) const { size_t k=rotLeft(v1.getHashKey(),3); k^=v2.getHashKey(); return k; } + bool operator==(const JointVecV& vv) const { return (v1==vv.v1 && v2==vv.v2); } + bool operator<(const JointVecV& vv) const { return (v1=0); assert(i::getSub1().readercontextPush(rc);*/ fprintf(pf,";"); + v2.write(pf,rc); /*JointVecV::getSub1().readercontextPop(rc);*/ } + void write ( FILE* pf ) const { RCType rc; write(pf,rc); } + string getString( RCType& rc ) const { return v1.getString(rc) + ";" + v2.getString(rc); } + string getString() const { RCType rc; return getString(rc); } +}; +template +const int JointVecV::NUM_ENTS = V1::NUM_ENTS+V2::NUM_ENTS; + +//////////////////////////////////////////////////////////// + +template +class DelimitedJointVecV : public JointVecV { + public: + typedef typename V1::RCType RCType; + // Constructor / destructor methods... + DelimitedJointVecV ( ) : JointVecV() { } + DelimitedJointVecV ( const string& s, typename V1::RCType& rc ) : JointVecV() { read(s,rc); } + // Extraction methods... + bool operator==(const DelimitedJointVecV& vv) const { return JointVecV::operator==(vv); } + bool operator<(const DelimitedJointVecV& vv) const { return JointVecV::operator<(vv); } + void readercontextPush ( typename V1::RCType& rc ) const { JointVecV::getSub1().readercontextPush(rc); JointVecV::getSub2().readercontextPush(rc); } + void readercontextPop ( typename V1::RCType& rc ) const { JointVecV::getSub1().readercontextPop(rc); JointVecV::getSub2().readercontextPop(rc); } + // Input / output methods... + void read ( char*, RCType& ) ; + void write ( FILE* pf, RCType& rc ) const { + fprintf(pf,"%s",SD1); JointVecV::getSub1().write(pf,rc); /*JointVecV::getSub1().readercontextPush(rc);*/ + fprintf(pf,"%s",SD2); JointVecV::getSub2().write(pf,rc); /*JointVecV::getSub1().readercontextPop(rc);*/ + fprintf(pf,"%s",SD3); + } + void write ( FILE* pf ) const { RCType rc; write(pf,rc); } + string getString(RCType& rc) const { + return SD1 + JointVecV::getSub1().getString(rc) + SD2 + JointVecV::getSub2().getString(rc) + SD3; + } + string getString() const { RCType rc; return getString(rc); } +}; + +//////////////////// +template +void DelimitedJointVecV::read ( char* s, RCType& rc ) { + ////fprintf(stderr,"DelimitedJointVecV::read chopping '%s' into '%s'...'%s'...'%s'\n",s.c_str(),SD1,SD2,SD3); + if(0!=strncmp(SD1,s,strlen(SD1))) fprintf(stderr,"ERR: '%s' doesn't begin with '%s'\n",s,SD1); + if(0!=strcmp(SD3,s+strlen(s)-strlen(SD3))) fprintf(stderr,"ERR: '%s' doesn't end with '%s'\n", s,SD3); + /* + if(0!=strncmp(SD1,s.c_str(),strlen(SD1))) fprintf(stderr,"ERR: '%s' doesn't begin with '%s'\n",s.c_str(),SD1); + if(0!=strcmp(SD3,s.c_str()+s.length()-strlen(SD3))) fprintf(stderr,"ERR: '%s' doesn't end with '%s'\n",s.c_str(),SD3); + string::size_type j = s.find(SD2); + if(string::npos==j) fprintf(stderr,"ERR: no '%s' found in %s\n",SD2,s.c_str()); + string s1 = s.substr ( strlen(SD1), j-strlen(SD1) ); + string s2 = s.substr ( j+strlen(SD2), int(s.length())-int(j)-strlen(SD2)-strlen(SD3) ); + */ + char* s1 = s+strlen(SD1); + char* s2 = strstr(s,SD2); if (!s2) fprintf(stderr,"WARNING: (VecV) no '%s' found in '%s' -- assuming second string empty\n",SD2,s); + s[strlen(s)-strlen(SD3)]='\0'; + if (s2) { *s2='\0'; s2+=strlen(SD2); } else { s2=s+strlen(s); } + //fprintf(stderr,"s1=%s, s2=%s\n",s1,s2); + ////fprintf(stderr,"DelimitedJointVecV::read gives `%s' and `%s'\n",s1.c_str(),s2.c_str()); + JointVecV::getSub1().readercontextPush(rc); JointVecV::setSub2().read ( s2, rc ); //V1::NUM_ENTS, msi ); + JointVecV::getSub1().readercontextPop(rc); JointVecV::setSub1().read ( s1, rc ); //0, msi ); +} + +//////////////////////////////////////////////////////////// + +char psXX[] = ""; // Null delimiter for joint RVs +template +class VecVV : public DelimitedJointVecV >,psXX,VecV >,psXX> {}; // Coindexation vector, for result of binary branch rule + +//////////////////////////////////////////////////////////////////////////////// + +template +class ComposedVecV : public JointVecV { + public: + ComposedVecV ( const V1& v1, const V2& v2 ) : JointVecV() { + ////fprintf(stderr,"iComp "); v1.write(stderr); fprintf(stderr," "); v2.write(stderr); fprintf(stderr,"\n"); + for (int i=0; i::set(i) = v1.get(i); + // If v1[i] inter-indexed... + else if ( v1.get(i)::set(i) = (v2.get(v1.get(i).toInt()-V1::NUM_ENTS)!=-1) ? v2.get(v1.get(i).toInt()-V1::NUM_ENTS)+V1::NUM_ENTS : -1; +//fprintf(stderr,"??????????? %d(%d,%d)%d,%d: ",i,V1::NUM_ENTS,V2::NUM_ENTS,int(v1.get(i)),int(v2.get(13))); JointVecV::write(stderr); fprintf(stderr,"\n"); +//} + else + JointVecV::set(i) = v1.get(i); + } + else { + // If not valid... + if ( v2.get(i-V1::NUM_ENTS)==-1 ) continue; + // If in second sub-part... + else JointVecV::set(i) = v2.get(i-V1::NUM_ENTS)+V1::NUM_ENTS; + } + } + ////fprintf(stderr,"oComp "); JointVecV::write(stderr); fprintf(stderr,"\n"); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +template +class CommutedVecV : public JointVecV { + public: + CommutedVecV ( const JointVecV& v ) : JointVecV() { + ////fprintf(stderr,"iComm "); v.write(stderr); fprintf(stderr,"\n"); + // Iterate backward through new coind vector... + for ( int i=V1::NUM_ENTS+V2::NUM_ENTS-1; i>=0; i-- ) { + // If in second sub part V2 (first in v)... + if ( i>=V1::NUM_ENTS ) { + // If not valid... + if ( v.get(i-V1::NUM_ENTS)==-1 ) continue; + // If v[i-w2] intra-indexed... + else if ( v.get(i-V1::NUM_ENTS)::set(i) = v.get(i-V1::NUM_ENTS)+V1::NUM_ENTS; + // If v[i-w2] cross-indexed to other sub-part (V1)... + else if ( v.get(i-V1::NUM_ENTS)::set(v.get(i-V1::NUM_ENTS).toInt()-V2::NUM_ENTS) = i; + JointVecV::set(i) = get(v.get(i-V1::NUM_ENTS).toInt()-V2::NUM_ENTS); + } + // If v[i-w2] inter-indexed... + else JointVecV::set(i) = v.get(i-V1::NUM_ENTS); + } + // If in first sub part V1 (second in v), and not coindexed already... + else if (JointVecV::get(i)==-1) { + // If not valid... + if ( v.get(i+V2::NUM_ENTS)==-1 ) continue; + // If v[i+w1] intra-indexed... + else if ( v.get(i+V2::NUM_ENTS)::set(i) = v.get(i+V2::NUM_ENTS)-V2::NUM_ENTS; + else + JointVecV::set(i) = get(v.get(i+V2::NUM_ENTS).toInt()-V2::NUM_ENTS); + } + // If v[i+w1] inter-indexed... + else JointVecV::set(i) = v.get(i+V2::NUM_ENTS); + } + //fprintf(stderr,"??????????? %d: ",i); v.write(stderr); fprintf(stderr," -> "); JointVecV::write(stderr); fprintf(stderr,"\n"); + } + ////fprintf(stderr,"oComm "); JointVecV::write(stderr); fprintf(stderr,"\n"); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +template +class NewCommutedVecV : public JointVecV > { + public: + NewCommutedVecV ( const JointVecV >& v ) : JointVecV >() { + ////fprintf(stderr,"iNewComm "); v.write(stderr); fprintf(stderr,"\n"); + // Iterate backward through new coind vector... + for ( int i=V1::NUM_ENTS+V2::NUM_ENTS+V3::NUM_ENTS-1; i>=0; i-- ) { + // If in third sub part V3 (second in v)... + if ( i>=V1::NUM_ENTS+V2::NUM_ENTS ) { + // If not valid... + if ( v.get(i-V2::NUM_ENTS)==-1 ) continue; + // If v[i-w2] intra-indexed... + else if ( v.get(i-V2::NUM_ENTS) >::set(i) = v.get(i-V2::NUM_ENTS)+V2::NUM_ENTS; + // If v[i-w2] cross-indexed to other sub-part (V2, third in v)... + else if ( v.get(i-V2::NUM_ENTS) >::set(v.get(i-V2::NUM_ENTS).toInt()-V3::NUM_ENTS) = i; + JointVecV >::set(i) = get(v.get(i-V2::NUM_ENTS).toInt()-V3::NUM_ENTS); + } + // If v[i-w2] inter-indexed... + else JointVecV >::set(i) = v.get(i-V2::NUM_ENTS); + } + // If in second sub part V2 (third in v), and not coindexed already... + else if ( i>=V1::NUM_ENTS && JointVecV >::get(i)==-1 ) { + // If not valid... + if ( v.get(i+V3::NUM_ENTS)==-1 ) continue; + // If v[i+w1] intra-indexed... + else if ( v.get(i+V3::NUM_ENTS) >::set(i) = v.get(i+V3::NUM_ENTS)-V3::NUM_ENTS; + else + JointVecV >::set(i) = get(v.get(i+V3::NUM_ENTS).toInt()-V3::NUM_ENTS); + // If v[i+w1] inter-indexed... + else JointVecV >::set(i) = v.get(i+V3::NUM_ENTS); + } + // If in first sub part V1 (also first in v)... + else if ( i=V1::NUM_ENTS+V3::NUM_ENTS+V2::NUM_ENTS ) + JointVecV >::set(i) = v.get(i); + // If v[i] inter-indexed to V3 (second in v)... + else if ( v.get(i) >::set(i) = v.get(i)+V2::NUM_ENTS; + // If v[i] inter-indexed to V2 (third in v) (note: may have gotten forwarded in commute)... + else JointVecV >::set(i) = get(v.get(i).toInt()-V3::NUM_ENTS); + } + //fprintf(stderr,"??????????? %d: ",i); v.write(stderr); fprintf(stderr," -> "); JointVecV >::write(stderr); fprintf(stderr,"\n"); + } + ////fprintf(stderr,"oNewComm\n"); //JointVecV::write(stderr); fprintf(stderr,"\n"); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +template +class AssociatedVecV : public V { + public: + AssociatedVecV ( const V1& v ) { + // Iterate through new coind vector... + for ( int i=0; i +class MarginalVecV : public V1 { + public: + MarginalVecV ( const JointVecV& v ) { + ////fprintf(stderr,"iMarg "); v.write(stderr); fprintf(stderr,"\n"); + // Iterate through new coind vector... + for ( int i=0; i. // +// // +// ModelBlocks developers designate this particular file as subject to // +// the "Moses" exception as provided by ModelBlocks developers in // +// the LICENSE file that accompanies this code. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _NL_DTREE__ +#define _NL_DTREE__ + +#include +#include +#include +#include +#include "nl-safeids.h" +//#include "nl-probmodel.h" +#include "nl-cpt.h" +#include "nl-randvar.h" +#include "nl-tree.h" +#include "nl-hash.h" +#include "nl-list.h" + +//#define UNIGR_FO .5 // IF NO EXAMPLES, USE APRIORI PROB OF FO + +//typedef Id A; //BitNum; +// //typedef Id B; + +using namespace std; + +//////////////////////////////////////////////////////////////////////////////// +// +// DTree Node +// +//////////////////////////////////////////////////////////////////////////////// + +template +class DecisNode { + public: + // Public types... + typedef Id A; + + private: + // Private types... + typedef typename X::ElementType B; + // Data members... + A aNontermDecis; // Nonterminal nodes have an attribute (e.g. convexity bit) on which to condition + map aprTermDistrib; // Terminal nodes have a distribution over Y values + + public: + // Constructor / destructor methods... + DecisNode ( ) : aNontermDecis(-1) { } + + // Specification methods... + A& setA ( ) { return aNontermDecis; } + P& setProb ( const Y y ) { return aprTermDistrib[y]; } + + // Extraction methods... + const A getA ( ) const { return aNontermDecis; } + const P getProb ( const Y y ) const { return ( (aprTermDistrib.empty()) ? P(1.0/Y::getDomain().getSize()) : + (aprTermDistrib.find(y)!=aprTermDistrib.end()) ? aprTermDistrib.find(y)->second : P() ); } +}; + + +//////////////////////////////////////////////////////////////////////////////// +// +// DTree Model +// +//////////////////////////////////////////////////////////////////////////////// + +template +//class DTree2DModel : public Generic2DModel, public Tree < typename X::ElementType, DecisNode > { +class DTree2DModel : public Tree < typename X::ElementType, DecisNode > { + private: + // Type members... + typedef typename X::ElementType B; + public: + // Downcasts (safe b/c no new data)... + DTree2DModel& setBranch(const B& b) { return static_cast&> ( Tree >::setBranch(b) ); } + const DTree2DModel& getBranch(const B& b) const { return static_cast&> ( Tree >::getBranch(b) ); } + // Extraction methods... + const P getProb ( const Y y, const X& x ) const { + const Tree >* ptr = this; + while ( !ptr->isTerm() ) /*{cerr<getA().toInt())<<",";*/ ptr = &ptr->getBranch ( x.get(ptr->getA().toInt()) ); /*}*/ + return ptr->getProb(y); + } + // Input / output methods... + bool readFields ( Array& ) ; + void write ( FILE* pf, const char psPrefix[], char psPath[], int iEnd ) const { + if (Tree >::isTerm()) { + psPath[iEnd]='\0'; + Y y; + for ( bool b=y.setFirst(); b; b=y.setNext() ) + { fprintf(pf, "%s [%s] : ", psPrefix, psPath); fprintf(pf,"%s",y.getString().c_str()); fprintf(pf, " = %f\n", Tree >::getProb(y).toDouble()); } + ////psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] : 0 = %f\n", psPrefix, psPath, (double)Tree >::getProb("0") ); + ////psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] : 1 = %f\n", psPrefix, psPath, (double)Tree >::getProb("1") ); + } else { + psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] = %d\n", psPrefix, psPath, (int)Tree >::getA().toInt() ); + B b; + for ( bool bb=b.setFirst(); bb; bb=b.setNext() ) { + psPath[iEnd]=b.getString().c_str()[0]; psPath[iEnd+1]='\0'; getBranch(b).write(pf,psPrefix,psPath,iEnd+1); +// psPath[iEnd]='0'; psPath[iEnd+1]='\0'; getLeft().write ( pf, psPrefix, psPath, iEnd+1 ); +// psPath[iEnd]='1'; psPath[iEnd+1]='\0'; getRight().write ( pf, psPrefix, psPath, iEnd+1 ); + } + } + } + void writeFields ( FILE* pf, string sPref ) { + char psPath[1000] = ""; + write ( pf, (sPref+"").c_str(), psPath, 0 ); + } + //// + friend pair*> operator>> ( StringInput si, DTree2DModel& m ) { + return pair*>(si,&m); } + friend StringInput operator>> ( pair*> si_m, const char* psD ) { + if (StringInput(NULL)==si_m.first) return si_m.first; + Y y; String xs; StringInput si,si2; si=si_m.first; DTree2DModel* pm=si_m.second; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>xs>>" "; + while((si2=si>>" ")!=NULL)si=si2; + // Find appropriate node, creating nodes as necessary... + for(int i=1; isetBranch ( B(psTemp) ); + } + + if ( si!=NULL && si[0]==':' ) { + si=si>>": "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>y>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>"= "; + while((si2=si>>" ")!=NULL)si=si2; + // Specify attribute number (at nonterminal) or probability in distribution (at terminal)... + return (si!=NULL) ? si>>pm->setProb(y)>>psD : si; + } + else if ( si!=NULL && si[0]=='=' ) { + si=si>>"= "; //cerr<<" in after equals "<<((si==NULL) ? "yes" : "no") << endl; + while((si2=si>>" ")!=NULL)si=si2; + + //m.setA() = atoi(si.c_str()); + int aVar = 0; + si=si>>aVar>>psD; + pm->setA()=aVar; + ////cerr<<" at end "<<((si==NULL) ? "yes" : "no") << endl; + ////cerr<<" m.getA() is "<< m.getA().toInt() << endl; + return si; + //return (si!=NULL) ? si>>m.setA()>>psD : si; + } + else if ( si!=NULL ) cerr<<" ??? ["<& m = *si_m.second; + si=si_m.first; + sRt = si.c_str(); + if (sRt.find(':')!=string::npos) { + while((si2=si>>" [")!=NULL)si=si2; + si=si>>xs>>"] "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>": "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>y>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>"= "; + + // For DTree, must find the node labeled by X + //Tree >* ptr = m; + //assert(ptr); + // Find appropriate node, creating nodes as necessary... + for(int i=1; i>m.setProb(y)>>psD : si; + } else { + while((si2=si>>" [")!=NULL)si=si2; + si=si>>xs>>"] "; //cerr<<" in bracket "<<((si==NULL) ? "yes" : "no") << endl; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>"= "; //cerr<<" in after equals "<<((si==NULL) ? "yes" : "no") << endl; + + //m.setA() = atoi(si.c_str()); + int aVar = 0; + si=si>>aVar>>psD; + m.setA()=aVar; + //cerr<<" at end "<<((si==NULL) ? "yes" : "no") << endl; + //cerr<<" m.getA() is "<< m.getA().toInt() << endl; + return si; + //return (si!=NULL) ? si>>m.setA()>>psD : si; + } + */ + } + //// +}; + +//////////////////// +template +bool DTree2DModel::readFields ( Array& aps ) { + if ( /*aps[0]==sId &&*/ (3==aps.size() || 4==aps.size()) ) { + //fprintf(stderr,"%s,%d\n",aps[3],numFields); + assert ( '['==aps[1][0] && ']'==aps[1][strlen(aps[1])-1] ); + + // Start at root... + Tree >* ptr = this; + assert(ptr); + + // Find appropriate node, creating nodes as necessary... + for(int i=1; isetBranch ( B(psTemp) ); +// assert ( '0'==aps[1][i] || '1'==aps[1][i] ); +// ptr = ( ('0'==aps[1][i]) ? &ptr->setLeft() : &ptr->setRight() ) ; +// assert(ptr); + } + + // Specify attribute number (at nonterminal) or probability in distribution (at terminal)... + if (3==aps.size()) ptr->setA() = atoi(aps[2]); + else if (4==aps.size()) ptr->setProb(aps[2]) = atof(aps[3]); + else assert(false); + + } else return false; + return true; +} + +//////////////////////////////////////////////////////////////////////////////// + +template +//class DTree3DModel : public Generic3DModel { +class DTree3DModel { + private: + // Type members... + typedef typename X2::ElementType B; + // Data members... + string sId; + SimpleHash > aqt; + public: + // Constructor / destructor methods... + DTree3DModel ( ) { } + DTree3DModel ( const string& s ) { sId = s; } + // Specification methods... + DTree2DModel& setTree ( const X1& x1 ) { return aqt.set(x1); } + // Extraction methods... + bool setFirst ( Y& y ) const { return y.setFirst(); } + bool setNext ( Y& y ) const { return y.setNext(); } + P getProb ( const Y y, const X1& x1, const X2& x2 ) const { return aqt.get(x1).getProb(y,x2); } + // Input / output methods... + bool readFields ( char*[], int ) ; + void writeFields ( FILE* pf, string sPref ) { + char psPath[1000] = ""; + X1 x1; + for ( bool b=x1.setFirst(); b; b=x1.setNext() ) + aqt.get(x1).write ( pf, (sPref + " " + x1.getString()).c_str(), psPath, 0 ); + } +}; + +//////////////////// +template +bool DTree3DModel::readFields ( char* aps[], int numFields ) { + if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) { + //fprintf(stderr,"%s,%d\n",aps[3],numFields); + assert ( '['==aps[2][0] && ']'==aps[2][strlen(aps[2])-1] ); + + // Start at root... + Tree >* ptr = &aqt.set(aps[1]); + assert(ptr); + + // Find appropriate node, creating nodes as necessary... + for(int i=1; isetBranch ( B(psTemp) ); +// assert ( '0'==aps[2][i] || '1'==aps[2][i] ); +// ptr = ( ('0'==aps[2][i]) ? &ptr->setLeft() : &ptr->setRight() ) ; +// assert(ptr); + } + + // Specify bit (at nonterminal) or distribution (at terminal)... + if (4==numFields) ptr->setA() = atoi(aps[3]); + else if (5==numFields) ptr->setProb(aps[3]) = atof(aps[4]); + //// else if (5==numFields && 0==strcmp(aps[3],"0")) ptr->setProb() = 1.0 - atof(aps[4]); + //// else if (5==numFields && 0==strcmp(aps[3],"1")) ptr->setProb() = atof(aps[4]); + else assert(false); + + } else return false; + return true; +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// Trainable DTree Model +// +//////////////////////////////////////////////////////////////////////////////// + +template +class TrainableDTree2DModel : public DTree2DModel { + private: + // Type members... + typedef typename X::ElementType B; + // Static data members... + static List > lxyInitial; + public: + // Static member varaibles... + static bool OUTPUT_NOISY; + static bool OUTPUT_VERYNOISY; + // Downcasts (safe b/c no new data)... + TrainableDTree2DModel& setBranch(const B& b) { return static_cast&> ( Tree >::setBranch(b) ); } + const TrainableDTree2DModel& getBranch(const B& b) const { return static_cast&> ( Tree >::getBranch(b) ); } + // Specification methods... + void train ( List >&, const DecisNode&, const double ) ; + void train ( const double d ) { train(lxyInitial,DecisNode(),d); } + ////// Input / output methods... + bool readData ( Array& aps ) { + if ( 3==aps.size() ) lxyInitial.add() = Joint2DRV ( X(aps[1]), Y(aps[2]) ); + else if ( 4==aps.size() ) { + for ( int i=atoi(aps[3]); i>0; i-- ) + lxyInitial.add() = Joint2DRV ( X(aps[1]), Y(aps[2]) ); + } + else return false; + return true; + } +}; +template List > TrainableDTree2DModel::lxyInitial; +template bool TrainableDTree2DModel::OUTPUT_NOISY = false; +template bool TrainableDTree2DModel::OUTPUT_VERYNOISY = false; + +//////////////////// +template +void TrainableDTree2DModel::train ( List >& lxy, const DecisNode& dnParent, const double DTREE_CHISQR_LIMIT ) { + + typedef typename DecisNode::A A; + + // Place to store counts... + //CPT3DModel aaaCounts; // hash was MUCH slower!! + SafeArray3D aaaCounts ( X::getSize(), B::getDomain().getSize(), Y::getDomain().getSize(), 0.0 ); + double dTot = lxy.getCard(); + CPT1DModel modelY; // ( "Y_prior" ); + + // For each datum in list... + ListedObject >* pxy; + for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) { + // For each attribute position... + for ( A a=0; afirst.get(a.toInt()), pxy->second )++; //(pxy->second==Y("1"))?1:0 )++; + modelY.setProb(pxy->second)++; + } + modelY.normalize(); + +// // If best attribute's prediction is not significant... +// else { + // Add ratio as leaf... + Y y; + for ( bool by=y.setFirst(); by; by=y.setNext() ) + DecisNode::setProb(y) = (dTot>100) ? modelY.getProb(y) : (double)dnParent.getProb(y); //1.0/Y::getDomain().getSize(); + //DecisNode::setProb("0") = (dXX>0) ? dX0/dXX : UNIGR_FO; + //DecisNode::setProb("1") = (dXX>0) ? dX1/dXX : UNIGR_FO; + ////DTree3DModel::setLeafModel().setProb(Y("0"),x1,node) = dX0/dXX; + ////DTree3DModel::setLeafModel().setProb(Y("1"),x1,node) = dX1/dXX; +// } + + double chisqr = 0.0; + A aBest=0; + + // Bail if will never be significant... + if ( !lxy.isEmpty() && lxy.getCard()>1000 ) { + + // For each attribute position... + double entBest=0.0; + for ( A a=0; a modelY_giv_B; // ( "Y_giv_B" ); + CPT1DModel modelB; // ( "B_prior" ); + B b; + for ( bool bb=b.setFirst(); bb; bb=b.setNext() ) { + Y y; + for ( bool by=y.setFirst(); by; by=y.setNext() ) { + modelY_giv_B.setProb(y,b) = aaaCounts.get(a,b,y); + modelB.setProb(b) += aaaCounts.get(a,b,y); + } + } + modelY_giv_B.normalize(); + modelB.normalize(); + + // Calc entropy... + double ent = 0.0; + for ( bool bb=b.setFirst(); bb; bb=b.setNext() ) { + Y y; + for ( bool by=y.setFirst(); by; by=y.setNext() ) + ent -= (0.0==modelY_giv_B.getProb(y,b)) ? 0.0 : ( modelB.getProb(b) * modelY_giv_B.getProb(y,b) * log(modelY_giv_B.getProb(y,b)) ); + } + + // Record minimum entropy division... + if ( a==0 || ent0 ) { + // Local model for each attrib (bit num)... + CPT1DModel modelB_giv_Abest; // ( "B_giv_Abest" ); + B b; + for ( bool bb=b.setFirst(); bb; bb=b.setNext() ) { + Y y; + for ( bool by=y.setFirst(); by; by=y.setNext() ) { + modelB_giv_Abest.setProb(b) += aaaCounts.get(aBest,b,y); + } + } + modelB_giv_Abest.normalize(); + + // Calc chi sqr... + for ( bool bb=b.setFirst(); bb; bb=b.setNext() ) { + Y y; + for ( bool by=y.setFirst(); by; by=y.setNext() ) { + if ( modelB_giv_Abest.getProb(b)>0 && modelY.getProb(y)>0 && dTot>0 ) { + double exp = modelB_giv_Abest.getProb(b) * modelY.getProb(y) * dTot; + chisqr += pow ( aaaCounts.get(aBest,b,y)-exp, 2 ) / exp; + } + } + } + } + + if(OUTPUT_NOISY) + fprintf(stderr," chisqr=%g\n",chisqr); + } + + // If best bit prediction is significant... + if ( chisqr > DTREE_CHISQR_LIMIT ) { + //// // Add node to model fields... + //// DTree3DModel::setNodeMap()[Joint2DRV(x1,node)] = aBest; + // Split list into each 0/1 child of this node... + SimpleHash > > alxy; + // For each datum in list... + while ( !lxy.isEmpty() ) { + Joint2DRV* pxy = lxy.getFirst(); + alxy[pxy->first.get(aBest.toInt())].add() = *pxy; + lxy.pop(); + } + // Recursively call train at each child... + DecisNode::setA()=aBest; + B b; + for ( bool bb=b.setFirst(); bb; bb=b.setNext() ) + setBranch(b).train ( alxy[b], *this, DTREE_CHISQR_LIMIT ); + // setLeft().train ( alxy[0], DTREE_CHISQR_LIMIT ); ////node*2LL); + // setRight().train ( alxy[1], DTREE_CHISQR_LIMIT ); ////node*2LL+1LL); + } +} + + +//////////////////////////////////////////////////////////////////////////////// + +template +class TrainableDTree3DModel : public DTree3DModel { + + private: + + map > > mqlxy; + + public: + + // Static member varaibles... + static bool OUTPUT_NOISY; + + ////// Constructor... + TrainableDTree3DModel(const char* ps) : DTree3DModel(ps) { } + + ////// setTree downcast... + TrainableDTree2DModel& setTree(const X1& x1) { return static_cast&>(DTree3DModel::setTree(x1)); } + + ////// Add training data to per-subphone lists... + bool readData ( Array& aps ) { + if ( 4==aps.size() ) { + mqlxy[X1(aps[1])].add() = Joint2DRV ( X2(aps[2]), Y(aps[3]) ); + ////mqlxy[X1(aps[1])].getLast()->write(stderr); fprintf(stderr,"\n"); + } + else return false; + return true; + } + + ////// Train each subphone... + void train ( const double DTREE_CHISQR_LIMIT ) { + int ctr = 0; + // For each subphone... + X1 x1; for ( bool b=x1.setFirst(); b; b=x1.setNext() ) { + if(OUTPUT_NOISY) + fprintf(stderr,"***** x1:%s (number %d) *****\n",x1.getString().c_str(),ctr++); + setTree(x1).train ( mqlxy[x1], DTREE_CHISQR_LIMIT ); + } + } +}; + +template bool TrainableDTree3DModel::OUTPUT_NOISY = false; + + +#endif // _NL_DTREE__ diff --git a/mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmm.h b/mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmm.h new file mode 100644 index 0000000000000000000000000000000000000000..c4414c4b7b3b48328b3d6d5a6a84b7e85444ec56 --- /dev/null +++ b/mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmm.h @@ -0,0 +1,1007 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. // +// // +// ModelBlocks is free software: you can redistribute it and/or modify // +// it under the terms of the GNU General Public License as published by // +// the Free Software Foundation, either version 3 of the License, or // +// (at your option) any later version. // +// // +// ModelBlocks is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // +// GNU General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with ModelBlocks. If not, see . // +// // +// ModelBlocks developers designate this particular file as subject to // +// the "Moses" exception as provided by ModelBlocks developers in // +// the LICENSE file that accompanies this code. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _NL_HMM_ +#define _NL_HMM_ + +#include +#include +#include +#include +//#include +//#include +//#include +#include "nl-prob.h" +#include "nl-safeids.h" +#include "nl-beam.h" + +typedef int Frame; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// NullBackDat - default empty back-pointer data; can replace with word or sem relation +// +//////////////////////////////////////////////////////////////////////////////// + +template +class NullBackDat { + static const string sDummy; + public: + NullBackDat () {} + NullBackDat (const MY& my) {} + void write (FILE*) const {} + string getString() const { return sDummy; } +}; +template +const string NullBackDat::sDummy ( "" ); + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// Index - pointer to source in previous beam heap +// +//////////////////////////////////////////////////////////////////////////////// + +class Index : public Id { + public: + Index ( ) { } + Index (int i) {set(i);} + Index& operator++ ( ) {set(toInt()+1); return *this;} +}; + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// TrellNode - node in viterbi trellis +// +//////////////////////////////////////////////////////////////////////////////// + +template +class TrellNode { + private: + + // Data members... + Index indSource; + B backptrData; + S sId; + LogProb lgprMax; + + public: + + // Constructor / destructor methods... + TrellNode ( ) { } + TrellNode ( const Index& indS, const S& sI, const B& bDat, LogProb lgpr) + { indSource=indS; sId=sI; lgprMax=lgpr; backptrData=bDat; /* fo = -1; */ } + + // Specification methods... + const Index& setSource ( ) const { return indSource; } + const B& setBackData( ) const { return backptrData; } + const S& setId ( ) const { return sId; } + LogProb& setScore ( ) { return lgprMax; } + + // Extraction methods... + bool operator== ( const TrellNode& tnsb ) const { return(sId==tnsb.sId); } +// size_t getHashKey ( ) const { return sId.getHashKey(); } + const Index& getSource ( ) const { return indSource; } + const B& getBackData( ) const { return backptrData; } + const S& getId ( ) const { return sId; } + LogProb getLogProb ( ) const { return lgprMax; } + LogProb getScore ( ) const { return lgprMax; } +}; + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// HMM +// +//////////////////////////////////////////////////////////////////////////////// + +template > +class HMM { + private: + typedef std::pair IB; + // Data members... + const MY& my; + const MX& mx; + SafeArray2D,Id,TrellNode > aatnTrellis; + Frame frameLast; + int iNextNode; + public: + // Static member varaibles... + static bool OUTPUT_QUIET; + static bool OUTPUT_NOISY; + static bool OUTPUT_VERYNOISY; + static int BEAM_WIDTH; + // Constructor / destructor methods... + HMM ( const MY& my1, const MX& mx1 ) : my(my1), mx(mx1) { } + // Specification methods... + void init ( int, int, const S& ) ; + void init ( int, int, SafeArray1D,pair >* ); + void updateRanked ( const typename MX::RandVarType&, bool ) ; + void updateSerial ( const typename MX::RandVarType& ) ; + void updatePara ( const typename MX::RandVarType& ) ; + bool unknown ( const typename MX::RandVarType& ) ; + void each ( const typename MX::RandVarType&, Beam&, SafeArray1D,std::pair,LogProb> >& ) ; + // Extraction methods... + const TrellNode& getTrellNode ( int i ) const { return aatnTrellis.get(frameLast,i); } + int getBeamUsed ( int ) const ; + // Input / output methods... + void writeMLS ( FILE* ) const ; + void writeMLS ( FILE*, const S& ) const ; + void debugPrint() const; + double getCurrSum(int) const; + //void writeCurr ( FILE*, int ) const ; + void writeCurr ( ostream&, int ) const ; + void writeCurrSum ( FILE*, int ) const ; + void gatherElementsInBeam( SafeArray1D,pair >* result, int f ) const; + void writeCurrEntropy ( FILE*, int ) const; + //void writeCurrDepths ( FILE*, int ) const; + void writeFoll ( FILE*, int, int, const typename MX::RandVarType& ) const ; + void writeFollRanked ( FILE*, int, int, const typename MX::RandVarType&, bool ) const ; + std::list getMLS() const; + std::list > getMLSnodes() const; + std::list getMLS(const S&) const; + std::list > getMLSnodes(const S&) const; +}; +template bool HMM::OUTPUT_QUIET = false; +template bool HMM::OUTPUT_NOISY = false; +template bool HMM::OUTPUT_VERYNOISY = false; +template int HMM::BEAM_WIDTH = 1; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +template +void HMM::init ( int numFr, int numS, const S& s ) { + + // Alloc trellis... + BEAM_WIDTH = numS; + aatnTrellis.init(numFr,BEAM_WIDTH); + + frameLast=0; + + // Set initial element at first time slice... + aatnTrellis.set(frameLast,0) = TrellNode ( Index(0), s, B(), 0 ) ; +} + +template + void HMM::init ( int numFr, int beamSize, SafeArray1D,pair >* existingBeam ) { + + // Alloc trellis... + // int numToCopy = existingBeam->getSize(); + BEAM_WIDTH = beamSize; + aatnTrellis.init(numFr,BEAM_WIDTH); + + frameLast=0; + + // Set initial beam elements at first time slice... + for ( int i=0, n=existingBeam->getSize(); i ( Index(0), existingBeam->get(i).first, B(), existingBeam->get(i).second ) ; + } + +} + +template + void HMM::debugPrint() const{ + + for (int frame=0, numFrames=aatnTrellis.getxSize(); frame 0) { + cerr << "\t" << "aatnTrellis.get(frame=" << frame << ",beamIndex=" << beamIndex << ") is\t" << aatnTrellis.get(frame,beamIndex).getId() << "\tprob=" << aatnTrellis.get(frame,beamIndex).getLogProb().toDouble() << endl; + } + + } + + } + +} + +//////////////////////////////////////////////////////////////////////////////// + +template +bool outRank ( const quad >& a1, + const quad >& a2 ) { return (a1.third>a2.third); } + +template + bool HMM::unknown( const typename MX::RandVarType& x ) { + return mx.unknown(x); +} + + +template +void HMM::updateRanked ( const typename MX::RandVarType& x, bool b1 ) { + // Increment frame counter... + frameLast++; + + // Init beam for new frame... + Beam btn(BEAM_WIDTH); + SafeArray1D,std::pair,LogProb> > atnSorted (BEAM_WIDTH); + + Heap < quad >, outRank > ashpiQueue; + typedef quad > SHPI; + SHPI shpi, shpiTop; + int aCtr; + + ashpiQueue.clear(); + //shpi.first = -1; + //shpi.second = HModel::IterVal(); + //shpi.third = 1.0; + shpi.first = 0; + shpi.third = aatnTrellis.get(frameLast-1,shpi.first).getScore(); + shpi.third *= my.setIterProb ( shpi.second, aatnTrellis.get(frameLast-1,shpi.first).getId(), x, b1, aCtr=-1 ); + //S s; my.setTrellDat(s,shpi.second); + shpi.fourth = -1; + ////cerr<<"????? "<0; iTrg++ ) { + // Iterate A* (best-first) search until a complete path is at the top of the queue... + while ( ashpiQueue.getSize() > 0 && ashpiQueue.getTop().fourth < MY::IterVal::NUM_ITERS ) { + // Remove top... + shpiTop = ashpiQueue.dequeueTop(); + // Fork off (try to advance each elementary variable a)... + for ( int a=shpiTop.fourth.toInt(); a<=MY::IterVal::NUM_ITERS; a++ ) { + // Copy top into new queue element... + shpi = shpiTop; + // At variable position -1, advance beam element for transition source... + if ( a == -1 ) shpi.first++; + // Incorporate prob from transition source... + shpi.third = aatnTrellis.get(frameLast-1,shpi.first).getScore(); + if ( shpi.third > LogProb() ) { + // Try to advance variable at position a and return probability (subsequent variables set to first, probability ignored)... + shpi.third *= my.setIterProb ( shpi.second, aatnTrellis.get(frameLast-1,shpi.first).getId(), x, b1, aCtr=a ); + // At end of variables, incorporate observation probability... + if ( a == MY::IterVal::NUM_ITERS && shpi.fourth != MY::IterVal::NUM_ITERS ) + { S s; my.setTrellDat(s,shpi.second); shpi.third *= mx.getProb(x,s); } + // Record variable position at which this element was forked off... + shpi.fourth = a; + //cerr<<" from partial: "< LogProb() ) { + ////if ( frameLast == 4 ) cerr<<" from partial: "< 0 ) { + S s; my.setTrellDat(s,ashpiQueue.getTop().second); + bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,my.setBackDat(ashpiQueue.getTop().second)), ashpiQueue.getTop().third ); + ////cerr<,LogProb>* tn1 = &atnSorted.get(i); + aatnTrellis.set(frameLast,i)=TrellNode(tn1->first.second.first, + tn1->first.first, + tn1->first.second.second, + tn1->second); + } + + my.update(); +} + + +//////////////////////////////////////////////////////////////////////////////// + +template +void HMM::updateSerial ( const typename MX::RandVarType& x ) { + // Increment frame counter... + frameLast++; + + // Init beam for new frame... + Beam btn(BEAM_WIDTH); + SafeArray1D,std::pair,LogProb> > atnSorted (BEAM_WIDTH); + +// // Copy beam to trellis... +// for ( int i=0; i