Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- mosesdecoder/contrib/m4m/examples/giza-vs-fast.m4m +99 -0
- mosesdecoder/contrib/m4m/modules/obsolete/Makefile +64 -0
- mosesdecoder/contrib/m4m/modules/obsolete/baseline-system.m4m +48 -0
- mosesdecoder/contrib/m4m/modules/obsolete/directory-structure.m4m +7 -0
- mosesdecoder/contrib/m4m/modules/obsolete/model-filtering.m4m +37 -0
- mosesdecoder/contrib/m4m/modules/obsolete/phrase-table.make.scratch +124 -0
- mosesdecoder/contrib/m4m/modules/obsolete/reporting.m4m +95 -0
- mosesdecoder/contrib/m4m/modules/obsolete/run-moses.m4m +37 -0
- mosesdecoder/contrib/m4m/modules/obsolete/setup-experiments.m4m +121 -0
- mosesdecoder/contrib/m4m/modules/obsolete/skip-steps.mak +19 -0
- mosesdecoder/contrib/m4m/modules/obsolete/system.m4m +38 -0
- mosesdecoder/contrib/m4m/modules/obsolete/template.m4m +66 -0
- mosesdecoder/contrib/m4m/modules/obsolete/tune.m4m +45 -0
- mosesdecoder/contrib/m4m/scripts/fast-align2bal.py +31 -0
- mosesdecoder/contrib/m4m/scripts/giza.txt2snt.sh +41 -0
- mosesdecoder/contrib/m4m/scripts/moses.extract-phrases.sh +63 -0
- mosesdecoder/contrib/m4m/scripts/moses.make-lex.py +86 -0
- mosesdecoder/contrib/m4m/scripts/moses.phrase-extract.sh +110 -0
- mosesdecoder/contrib/m4m/scripts/moses.score-phrases.sh +41 -0
- mosesdecoder/contrib/m4m/scripts/moses.transfer-weights.py +61 -0
- mosesdecoder/contrib/m4m/util/Jamfile +12 -0
- mosesdecoder/contrib/memscore/Makefile.in +581 -0
- mosesdecoder/contrib/memscore/configure.ac +84 -0
- mosesdecoder/contrib/memscore/lexdecom.h +41 -0
- mosesdecoder/contrib/memscore/memscore.cpp +85 -0
- mosesdecoder/contrib/memscore/memscore.h +57 -0
- mosesdecoder/contrib/memscore/missing +360 -0
- mosesdecoder/contrib/memscore/phraselm.h +45 -0
- mosesdecoder/contrib/memscore/phrasetable.cpp +348 -0
- mosesdecoder/contrib/memscore/scorer.h +71 -0
- mosesdecoder/contrib/memscore/timestamp.h +29 -0
- mosesdecoder/contrib/mira/Main.cpp +1849 -0
- mosesdecoder/contrib/mira/Perceptron.cpp +53 -0
- mosesdecoder/contrib/mira/mira.xcodeproj/project.pbxproj +401 -0
- mosesdecoder/contrib/moses-speedtest/README.md +146 -0
- mosesdecoder/contrib/moses-speedtest/check_for_regression.py +63 -0
- mosesdecoder/contrib/moses-speedtest/cronjob +7 -0
- mosesdecoder/contrib/moses-speedtest/runtests.py +439 -0
- mosesdecoder/contrib/moses-speedtest/sys_drop_caches.py +22 -0
- mosesdecoder/contrib/moses-speedtest/test_config +3 -0
- mosesdecoder/contrib/moses-speedtest/testsuite_config +5 -0
- mosesdecoder/contrib/picaro/README +62 -0
- mosesdecoder/contrib/picaro/es/README +4 -0
- mosesdecoder/contrib/picaro/es/sample.aln +1 -0
- mosesdecoder/contrib/picaro/es/sample.e +1 -0
- mosesdecoder/contrib/picaro/es/sample.f +1 -0
- mosesdecoder/contrib/picaro/picaro.py +250 -0
- mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.idx +0 -0
- mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.idx +0 -0
- mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srctree.wa +0 -0
mosesdecoder/contrib/m4m/examples/giza-vs-fast.m4m
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# some variables need to be set before m4m modules are included
|
| 4 |
+
.SECONDARY:
|
| 5 |
+
|
| 6 |
+
MOSES_ROOT = ${HOME}/code/moses/master/mosesdecoder
|
| 7 |
+
MGIZA_ROOT = ${HOME}/tools/mgiza
|
| 8 |
+
fast_align = ${HOME}/bin/fast_align
|
| 9 |
+
|
| 10 |
+
# L1: source language; L2: target language
|
| 11 |
+
L1 = de
|
| 12 |
+
L2 = en
|
| 13 |
+
WDIR = $(CURDIR)
|
| 14 |
+
|
| 15 |
+
include ${MOSES_ROOT}/contrib/m4m/modules/m4m.m4m
|
| 16 |
+
|
| 17 |
+
# both systems use the same language model
|
| 18 |
+
L2raw := $(wildcard ${WDIR}/crp/trn/*/raw/*.${L2}.gz)
|
| 19 |
+
L2data := $(subst /raw/,/cased/,${L2raw})
|
| 20 |
+
lm.order = 5
|
| 21 |
+
lm.factor = 0
|
| 22 |
+
lm.lazy = 1
|
| 23 |
+
lm.file = ${WDIR}/lm/${L2}.5-grams.kenlm
|
| 24 |
+
${lm.file}: | $(L2data)
|
| 25 |
+
$(eval $(call add_kenlm,${lm.file},${lm.order},${lm.factor},${lm.lazy}))
|
| 26 |
+
.INTERMEDIATE: ${L2data}
|
| 27 |
+
|
| 28 |
+
dmodels = wbe-mslr-bidirectional-fe-allff
|
| 29 |
+
mysystem = systems/${word-alignment}-aligned
|
| 30 |
+
myptable = model/tm/${aligner}.${L1}-${L2}
|
| 31 |
+
mydtable = model/dm/${aligner}.${L1}-${L2}
|
| 32 |
+
|
| 33 |
+
wa ?= $(error wa not specified on command line)
|
| 34 |
+
SYSTEMS :=
|
| 35 |
+
aligner :=
|
| 36 |
+
$(foreach a,${wa},\
|
| 37 |
+
$(eval aligner:=${a});\
|
| 38 |
+
$(eval $(clear-ptables));\
|
| 39 |
+
$(eval $(clear-dtables));\
|
| 40 |
+
$(eval SYSTEMS+=systems/${a}-aligned);\
|
| 41 |
+
$(eval $(call add_binary_phrase_table,0,0,4,$${myptable}));\
|
| 42 |
+
$(eval $(call add_binary_reordering_table,0,0,8,\
|
| 43 |
+
${dmodels},$${mydtable},$${myptable}));\
|
| 44 |
+
$(eval $(call create_moses_ini,$${mysystem})))
|
| 45 |
+
|
| 46 |
+
aln: $(foreach a,${wa},${WDIR}/crp/trn/aln/$a/${L1}-${L2}.symal.gz)
|
| 47 |
+
info:
|
| 48 |
+
dtable: ${DTABLES}
|
| 49 |
+
ptable: ${PTABLES}
|
| 50 |
+
system: $(addsuffix /moses.ini.0,${SYSTEMS})
|
| 51 |
+
eval: ${EVALUATIONS}
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
ifdef tune.runs
|
| 55 |
+
|
| 56 |
+
TUNED_SYSTEMS :=
|
| 57 |
+
EVALUATIONS :=
|
| 58 |
+
$(eval $(tune_all_systems))
|
| 59 |
+
$(eval $(bleu_score_all_systems))
|
| 60 |
+
tune: ${TUNED_SYSTEMS}
|
| 61 |
+
echo TUNED ${TUNED_SYSTEMS}
|
| 62 |
+
all: ${EVALUATIONS}
|
| 63 |
+
|
| 64 |
+
else
|
| 65 |
+
|
| 66 |
+
tune: all
|
| 67 |
+
|
| 68 |
+
# The recursive calls below make sure that tuning runs happen sequentially
|
| 69 |
+
# (moses runs multi-threaded anyway). The reason is that we may want to have
|
| 70 |
+
# first results as soon as possible.
|
| 71 |
+
tune.runs := 1 1
|
| 72 |
+
$(info TUNE RUNS ${tune.runs})
|
| 73 |
+
all:
|
| 74 |
+
$(foreach n,$(shell seq ${tune.runs}),\
|
| 75 |
+
${MAKE} -f $(word 1, ${MAKEFILE_LIST}) \
|
| 76 |
+
tune.runs="$n $n" ${MAKECMDGOALS} -${MAKEFLAGS})
|
| 77 |
+
|
| 78 |
+
endif
|
| 79 |
+
|
| 80 |
+
.PHONY: $(addprefix reset-,lm tm dm all aln tune eval systems)
|
| 81 |
+
reset-aln: reset-mm
|
| 82 |
+
-rm -rf $(foreach a,${wa},crp/trn/aln/${a})
|
| 83 |
+
reset-mm: reset-dm reset-tm
|
| 84 |
+
-rm -rf $(foreach a,${wa},crp/trn/mm/${a})
|
| 85 |
+
reset-dm: reset-systems
|
| 86 |
+
-rm -rf $(foreach a,${wa},model/dm/${a}.*)
|
| 87 |
+
reset-tm: reset-systems
|
| 88 |
+
-rm -rf $(foreach a,${wa},model/tm/${a}.*)
|
| 89 |
+
reset-systems:
|
| 90 |
+
-rm -rf ${SYSTEMS}
|
| 91 |
+
reset-tune:
|
| 92 |
+
-rm -rf $(foreach s,${SYSTEMS}/$s/tune)
|
| 93 |
+
reset-eval:
|
| 94 |
+
-rm -rf $(foreach s,${SYSTEMS},$s/eval)
|
| 95 |
+
reset-lm:
|
| 96 |
+
-rm -rf lm
|
| 97 |
+
reset-all: reset-lm reset-aln
|
| 98 |
+
-rm -rf $(wildcard crp/trn/*/[ct]* crp/dev/[ct]* crp/tst/[ct]*)
|
| 99 |
+
-rm -rf auxiliary
|
mosesdecoder/contrib/m4m/modules/obsolete/Makefile
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
# Mandatory at the beginning of the file, before m4m inclusions
|
| 3 |
+
|
| 4 |
+
# L1,L2: tags that identify translation source (L1)
|
| 5 |
+
# and translation target (L2) language
|
| 6 |
+
L1 ?= de
|
| 7 |
+
L2 ?= en
|
| 8 |
+
|
| 9 |
+
LL = $(word 1, $(sort ${L1} ${L2}))-$(word 2, $(sort ${L1} ${L2}))
|
| 10 |
+
# a name for this experiment
|
| 11 |
+
experiment = dynsa-vs-std-phrase-table
|
| 12 |
+
|
| 13 |
+
# the working directry
|
| 14 |
+
WDIR = $(CURDIR)
|
| 15 |
+
MOSES_ROOT = ${HOME}/code/moses/master/mosesdecoder
|
| 16 |
+
|
| 17 |
+
# include m4m boilerplate
|
| 18 |
+
include ${MOSES_ROOT}/contrib/m4m/modules/m4m.m4m
|
| 19 |
+
|
| 20 |
+
$(info M4MDIR=${m4mdir})
|
| 21 |
+
|
| 22 |
+
#include ${m4mdir}/baseline-system.make
|
| 23 |
+
#include ${m4mdir}dynsa-system.make
|
| 24 |
+
#$(info ${MY_EXPERIMENT})
|
| 25 |
+
|
| 26 |
+
tune.sets = $(subst /raw/,/cased/,$(wildcard crp/dev/raw/*.${L1}.gz))
|
| 27 |
+
|
| 28 |
+
all:
|
| 29 |
+
.PHONY: all
|
| 30 |
+
|
| 31 |
+
ifdef tune.runs
|
| 32 |
+
$(foreach tuneset, $(word 1,${tune.sets:.${L1}.gz=}),\
|
| 33 |
+
$(foreach run,$(shell seq ${tune.runs}),\
|
| 34 |
+
$(eval $(call tune_system,baseline/moses.ini.0,\
|
| 35 |
+
baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\
|
| 36 |
+
${tuneset}.${L1},${tuneset}.${L2},0));\
|
| 37 |
+
$(if ,$(info $(call tune_system,baseline/moses.ini.0,\
|
| 38 |
+
baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\
|
| 39 |
+
${tuneset}.${L1},${tuneset}.${L2},0));)\
|
| 40 |
+
$(eval $(call copy_weights,dynsa/moses.ini.0,\
|
| 41 |
+
baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\
|
| 42 |
+
dynsa/tuned/$(notdir ${tuneset})/${run}/moses.ini));\
|
| 43 |
+
$(if ,$(info $(call copy_weights,dynsa/moses.ini.0,\
|
| 44 |
+
baseline/tuned/$(notdir ${tuneset})/${run}/moses.ini,\
|
| 45 |
+
dynsa/tuned/$(notdir ${tuneset})/${run}/moses.ini));)\
|
| 46 |
+
$(foreach evalset,$(word 2,${tune.sets:.${L1}.gz=}),\
|
| 47 |
+
$(foreach system,baseline dynsa,\
|
| 48 |
+
$(eval evaltarget:=${system}/eval/$(notdir ${tuneset})/${run}/$(notdir ${evalset}));\
|
| 49 |
+
$(eval $(call bleu_eval,${evaltarget},\
|
| 50 |
+
${system}/tuned/$(notdir ${tuneset})/${run}/moses.ini,\
|
| 51 |
+
${evalset}.${L1},${moses.inputtype.plaintext},${evalset}.${L2}));\
|
| 52 |
+
$(if ,$(info $(call bleu_eval,${evaltarget},\
|
| 53 |
+
${system}/tuned/$(notdir ${tuneset})/${run}/moses.ini,\
|
| 54 |
+
${evalset}.${L1},${moses.inputtype.plaintext},${evalset}.${L2}));)\
|
| 55 |
+
));\
|
| 56 |
+
))
|
| 57 |
+
|
| 58 |
+
all: ${EVALUATIONS}
|
| 59 |
+
echo EVALS ${EVALUATIONS}
|
| 60 |
+
else
|
| 61 |
+
all:
|
| 62 |
+
$(foreach n,$(shell seq 1 1),${MAKE} tune.runs="$n $n";)
|
| 63 |
+
endif
|
| 64 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/baseline-system.m4m
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# This module defines a simple phrase-based baseline system
|
| 4 |
+
# - a single corpus
|
| 5 |
+
# - no factors
|
| 6 |
+
# - single ttable
|
| 7 |
+
# - single distortion model
|
| 8 |
+
|
| 9 |
+
# chose a name for the system
|
| 10 |
+
# ${system}/moses.ini.0 then defines the system
|
| 11 |
+
system = baseline
|
| 12 |
+
SYSTEMS += ${system}
|
| 13 |
+
.PHONY: ${system}
|
| 14 |
+
${system}: ${system}/moses.ini.0
|
| 15 |
+
|
| 16 |
+
#################################################################################
|
| 17 |
+
#
|
| 18 |
+
# Create phrase table(s) and distortion model(s) that you want to use in this
|
| 19 |
+
# system. If you already have binary or text version of all tables, you don't
|
| 20 |
+
# need to specify pll.{txt1,txt2,aln}.
|
| 21 |
+
pll.txt1 = ${WDIR}/crp/trn/aln/fast/${L1}.txt.gz
|
| 22 |
+
pll.txt2 = ${WDIR}/crp/trn/aln/fast/${L2}.txt.gz
|
| 23 |
+
pll.aln = ${WDIR}/crp/trn/aln/fast/${L1}-${L2}.symal.gz
|
| 24 |
+
ptable = ${WDIR}/model/tm/ptable.${L1}-${L2}
|
| 25 |
+
dtable = ${WDIR}/model/dm/dtable.${L1}-${L2}
|
| 26 |
+
ptable.max-phrase-length = 7
|
| 27 |
+
# ptable.smoothing = --GoodTuring
|
| 28 |
+
# dmodels = wbe-mslr-bidirectional-fe-allff
|
| 29 |
+
|
| 30 |
+
LMODEL_ENTRIES = KENLM;name=KENLM0;order=5;factor=0;num-features=1;lazyken=0;path=$(abspath lm/europarl-v7.en.kenlm)
|
| 31 |
+
LMODELS = lm/europarl-v7.en.kenlm
|
| 32 |
+
|
| 33 |
+
MY_EXPERIMENT += $(call add_binary_phrase_table,0,0,5,${ptable})
|
| 34 |
+
$(eval $(call add_binary_phrase_table,0,0,5,${ptable}))
|
| 35 |
+
|
| 36 |
+
if 0
|
| 37 |
+
MY_EXPERIMENT += $(call add_binary_reordering_table,0,0,8,\
|
| 38 |
+
wbe-mslr-bidirectional-fe-allff,${dtable},${ptable})
|
| 39 |
+
$(eval $(call add_binary_reordering_table,0,0,8,\
|
| 40 |
+
wbe-mslr-bidirectional-fe-allff,${dtable},${ptable}))
|
| 41 |
+
endif
|
| 42 |
+
|
| 43 |
+
MY_EXPERIMENT += $(call create_moses_ini,${system})
|
| 44 |
+
$(eval $(call create_moses_ini,${system}))
|
| 45 |
+
|
| 46 |
+
#################################################################################
|
| 47 |
+
|
| 48 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/directory-structure.m4m
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# STANDARD LOCATIONS
|
| 4 |
+
basedir ?= $(CURDIR)
|
| 5 |
+
tune.dir ?= ${basedir}/tune
|
| 6 |
+
eval.dir ?= ${basedir}/eval
|
| 7 |
+
input.dir ?= ${basedir}/input
|
mosesdecoder/contrib/m4m/modules/obsolete/model-filtering.m4m
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
#
|
| 3 |
+
# This module deals with model filtering (if necessary).
|
| 4 |
+
# It produces the moses.ini files for filtered models for
|
| 5 |
+
# tuning and evaluation.
|
| 6 |
+
|
| 7 |
+
ifndef ${moses_ini_for_tuning}
|
| 8 |
+
moses_ini_for_tuning = # WHAT'S THE DEFAULT LOCATION FOR THIS IN EMS?
|
| 9 |
+
endif
|
| 10 |
+
|
| 11 |
+
ifndef ${moses_ini_for_eval}
|
| 12 |
+
moses_ini_for_eval = # WHAT'S THE DEFAULT LOCATION FOR THIS IN EMS?
|
| 13 |
+
endif
|
| 14 |
+
|
| 15 |
+
# filter models if suggested by set-up
|
| 16 |
+
ifneq (${moses_ini_for_tuning}, ${untuned_moses_ini})
|
| 17 |
+
${moses_ini_for_tuning}: | ${untuned_moses_ini}
|
| 18 |
+
${moses_ini_for_tuning}: | ${tuning_input_ready}
|
| 19 |
+
|
| 20 |
+
# phrase table in text format?
|
| 21 |
+
ifeq ($(shell grep -v '^ *\#' ${untuned_moses_ini} \
|
| 22 |
+
| grep -A1 '\[ttable-file\]' | tail -n +2 \
|
| 23 |
+
| head -n1 | awk '{print $$1}'),0)
|
| 24 |
+
# ADD PHRASE TABLE FILTERING COMMAND HERE
|
| 25 |
+
endif
|
| 26 |
+
|
| 27 |
+
# how does moses know if a lexicalized distortion table is binary or not?
|
| 28 |
+
# ADD LEXICAL DISTORTION TABLE FILTERING COMMAND HERE
|
| 29 |
+
|
| 30 |
+
ifneq (${moses_ini_for_eval),$(tuned_moses_ini))
|
| 31 |
+
# add code for model filtering for eval here
|
| 32 |
+
endif
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/phrase-table.make.scratch
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# .PHONY: $1
|
| 2 |
+
# $1: $1.binphr.idx
|
| 3 |
+
# $1.txt.gz: | L1text = $4
|
| 4 |
+
# $1.txt.gz: | L2text = $5
|
| 5 |
+
# $1.txt.gz: | symal = $6
|
| 6 |
+
# ${moses.ini}: $1
|
| 7 |
+
# PTABLES += 1;$2;$3;5;$1
|
| 8 |
+
# endef
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# ${target}.tmp/fwd/scored.gz: | ${target}/phrase-extraction.DONE
|
| 12 |
+
# | ${L1File} ${L2File} ${symal}
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# # convert phrase table from text file to binary format
|
| 16 |
+
# %.binphr.idx: | %.txt.gz ${MOSES_BIN}/processPhraseTable
|
| 17 |
+
# $(lock)
|
| 18 |
+
# zcat -f $*.txt.gz | ${MOSES_BIN}/processPhraseTable \
|
| 19 |
+
# -ttable ${L1factors} ${L2factors} - -nscores 5 -out ${@D}/_${@F} \
|
| 20 |
+
# && mv ${@D}/_${@F} $@
|
| 21 |
+
# $(unlock)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# # directory definitions
|
| 25 |
+
# mo_mdl = model
|
| 26 |
+
# mo_tmp = model/tmp
|
| 27 |
+
# wrdaln = ${fstaln}/out
|
| 28 |
+
# # wrdaln should be set elsewhere!
|
| 29 |
+
|
| 30 |
+
# # milestone files created during phrase table construction
|
| 31 |
+
# ptable_bin = ${mo_mdl}/ptable.${L1}-${L2}
|
| 32 |
+
# ptable = ${mo_mdl}/ptable.${L1}-${L2}.txt.gz
|
| 33 |
+
# lex1given2 = ${mo_mdl}/${L1}-given-${L2}.lex.gz
|
| 34 |
+
# lex2given1 = ${mo_mdl}/${L2}-given-${L1}.lex.gz
|
| 35 |
+
# mosesinifile = ${mo_mdl}/moses.ini.0
|
| 36 |
+
|
| 37 |
+
# .PHONY: lex ptable
|
| 38 |
+
# lex: ${lex1given2} ${lex2given1}
|
| 39 |
+
# ptable: ${ptable_bin}
|
| 40 |
+
|
| 41 |
+
# # steps taken in this module
|
| 42 |
+
|
| 43 |
+
# # -------------------------------------------------------------------------------
|
| 44 |
+
# # --- STEP 1a: extract raw phrases from word-aligned corpus ---------------------
|
| 45 |
+
# # -------------------------------------------------------------------------------
|
| 46 |
+
# # Note: the script ${moses.extract-phrases} takes care of initial sorting
|
| 47 |
+
# ${mo_tmp}/phrase-extraction.DONE: | ${moses.extract-phrases}
|
| 48 |
+
# ${mo_tmp}/phrase-extraction.DONE: | ${moses.extract}
|
| 49 |
+
# ${mo_tmp}/phrase-extraction.DONE: | ${wrdaln}/${L1}.txt.gz
|
| 50 |
+
# ${mo_tmp}/phrase-extraction.DONE: | ${wrdaln}/${L2}.txt.gz
|
| 51 |
+
# ${mo_tmp}/phrase-extraction.DONE: | ${wrdaln}/${L1}-${L2}.symal.gz
|
| 52 |
+
# ${mo_tmp}/phrase-extraction.DONE:
|
| 53 |
+
# $(lock)
|
| 54 |
+
# ${moses.extract-phrases} \
|
| 55 |
+
# ${moses.extract} \
|
| 56 |
+
# ${wrdaln}/${L1}.txt.gz \
|
| 57 |
+
# ${wrdaln}/${L2}.txt.gz \
|
| 58 |
+
# ${wrdaln}/${L1}-${L2}.symal.gz \
|
| 59 |
+
# ${mo_tmp} ${max_phrase_length} \
|
| 60 |
+
# ${dmodel.type}-${dmodel.orientation} \
|
| 61 |
+
# && touch $@
|
| 62 |
+
# $(unlock)
|
| 63 |
+
|
| 64 |
+
# # -------------------------------------------------------------------------------
|
| 65 |
+
# # --- STEP 1a: extract word translation lexica from word-aligned corpus ---------
|
| 66 |
+
# # --- (for lexical phrase scoring) ---------
|
| 67 |
+
# # -------------------------------------------------------------------------------
|
| 68 |
+
# $(lex2given1): $(lex1given2)
|
| 69 |
+
# $(lex1given2): | ${wrdaln}/${L1}.txt.gz
|
| 70 |
+
# $(lex1given2): | ${wrdaln}/${L2}.txt.gz
|
| 71 |
+
# $(lex1given2): | ${wrdaln}/${L1}-${L2}.symal.gz
|
| 72 |
+
# $(lock)
|
| 73 |
+
# $(moses.make-lex) \
|
| 74 |
+
# ${wrdaln}/${L1}.txt.gz \
|
| 75 |
+
# ${wrdaln}/${L2}.txt.gz \
|
| 76 |
+
# ${wrdaln}/${L1}-${L2}.symal.gz \
|
| 77 |
+
# $(lex1given2) \
|
| 78 |
+
# $(lex2given1)
|
| 79 |
+
# $(unlock)
|
| 80 |
+
|
| 81 |
+
# # -------------------------------------------------------------------------------
|
| 82 |
+
# # --- STEP 2: score extracted phrase pairs --------------------------------------
|
| 83 |
+
# # -------------------------------------------------------------------------------
|
| 84 |
+
# ptfwdhalf = ${mo_tmp}/fwd/phrases.fwd.scored.gz
|
| 85 |
+
# ptbwdhalf = ${mo_tmp}/bwd/phrase-scoring.DONE
|
| 86 |
+
|
| 87 |
+
# # -------------------------------------------------------------------------------
|
| 88 |
+
# # --- STEP 2a: score phrases in the 'forward' direction -------------------------
|
| 89 |
+
# # -------------------------------------------------------------------------------
|
| 90 |
+
# $(ptfwdhalf): | ${mo_tmp}/phrase-extraction.DONE
|
| 91 |
+
# $(ptfwdhalf): | ${lex1given2}
|
| 92 |
+
# $(lock)
|
| 93 |
+
# $(merge-sorted) ${mo_tmp}/fwd/part.*.gz \
|
| 94 |
+
# | ${moses.score-phrases} ${MOSES_BIN}/score - ${lex1given2} ${@:.scored.gz=} \
|
| 95 |
+
# $(ptable.smoothing) && mv $@_ $@
|
| 96 |
+
# $(unlock)
|
| 97 |
+
|
| 98 |
+
# # -------------------------------------------------------------------------------
|
| 99 |
+
# # --- STEP 2b: score phrases in the 'backward' direction -------------------------
|
| 100 |
+
# # -------------------------------------------------------------------------------
|
| 101 |
+
# # Note: ${moses.score-phrases} re-sorts the scored backward phrases
|
| 102 |
+
# $(ptbwdhalf): | ${mo_tmp}/phrase-extraction.DONE
|
| 103 |
+
# $(ptbwdhalf): | ${lex2given1}
|
| 104 |
+
# $(lock)
|
| 105 |
+
# $(merge-sorted) ${mo_tmp}/bwd/part.*.gz \
|
| 106 |
+
# | ${moses.score-phrases} ${MOSES_BIN}/score - ${lex2given1} ${@D}/scored \
|
| 107 |
+
# "$(ptable.smoothing)" --Inverse && touch $@
|
| 108 |
+
# $(unlock)
|
| 109 |
+
|
| 110 |
+
# # -------------------------------------------------------------------------------
|
| 111 |
+
# # --- STEP 3: put the two phrase table halves together --------------------------
|
| 112 |
+
# # -------------------------------------------------------------------------------
|
| 113 |
+
# # ptfwdhalf is a single .gz file, ptbwdhalf is a collection .gz files
|
| 114 |
+
# $(ptable): | ${MOSES_BIN}/consolidate
|
| 115 |
+
# $(ptable): | $(ptfwdhalf) $(ptbwdhalf)
|
| 116 |
+
# $(lock)
|
| 117 |
+
# ${MOSES_BIN}/consolidate \
|
| 118 |
+
# <(zcat ${ptfwdhalf}) \
|
| 119 |
+
# <(${merge-sorted} ${mo_tmp}/bwd/scored.*.gz) /dev/stdout \
|
| 120 |
+
# $(if $(ptable.smoothing), \
|
| 121 |
+
# $(ptable.smoothing) $(ptfwdhalf:.sorted.gz=.coc)) \
|
| 122 |
+
# | gzip > $@_ && mv $@_ $@
|
| 123 |
+
# $(unlock)
|
| 124 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/reporting.m4m
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
rset = set=$2,type=$3,file=evaluation/$1/$2.$3
|
| 4 |
+
analyses = $(foreach e, ${eval-sets}, \
|
| 5 |
+
$(call rset,$1,$e,analysis-precision) \
|
| 6 |
+
$(call rset,$1,$e,analysis-coverage))
|
| 7 |
+
eval-scores = $(foreach e, ${eval-sets}, \
|
| 8 |
+
$(foreach m, ${eval-metrics}, \
|
| 9 |
+
$(call rset,$1,$e,$m)))
|
| 10 |
+
eval-results = $(foreach e, ${eval-sets}, \
|
| 11 |
+
$(foreach m, ${eval-metrics}, \
|
| 12 |
+
evaluation/$1/$e.$m))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
.SECONDEXPANSION:
|
| 16 |
+
# NOTA BENE: setup-experiments.make adds additional dependencies for
|
| 17 |
+
# evaluation/%/report in the file experiments.make!
|
| 18 |
+
evaluation/%/report: sets = $(call eval-scores,$*)
|
| 19 |
+
#evaluation/%/report: sets += $(call analyses,$*)
|
| 20 |
+
#evaluation/%/report: tuned_moses_ini := $(if ${have_tuned_moses_ini},${have_tuned_moses_ini},tuning/$*/moses.tuned.ini)
|
| 21 |
+
evaluation/%/report: prereqs = $(call eval-results,$*)
|
| 22 |
+
evaluation/%/report: $$(prereqs)
|
| 23 |
+
echo $(foreach s, ${sets}, $s) $^
|
| 24 |
+
mkdir $@.lock
|
| 25 |
+
echo $(call lockline) > $@.lock/owner
|
| 26 |
+
${report} ${sets} > $@_
|
| 27 |
+
mv $@_ $@
|
| 28 |
+
rm $@.lock/owner
|
| 29 |
+
rmdir $@.lock
|
| 30 |
+
|
| 31 |
+
%.analysis: params1 = -input ${$(notdir $*)-src}
|
| 32 |
+
%.analysis: params1 += -input-corpus ${crp_train}.${L1}
|
| 33 |
+
%.analysis: params1 += -ttable ${ttable} -dir $@
|
| 34 |
+
%.analysis: params2 = -precision-by-coverage
|
| 35 |
+
%.analysis: params2 += -reference ${$(notdir $*)-ref}
|
| 36 |
+
%.analysis: params2 += -system $*.truecased
|
| 37 |
+
%.analysis: params2 += -segmentation $*.output
|
| 38 |
+
%.analysis: params2 += -system-alignment $*.output.wa
|
| 39 |
+
%.analysis: params2 += -coverage $@
|
| 40 |
+
%.analysis: | ${ttable} ${crp_train}.${L1}
|
| 41 |
+
%.analysis: %.output.wa %.output %.truecased
|
| 42 |
+
@echo ANALYSING $^
|
| 43 |
+
@mkdir $@.lock
|
| 44 |
+
@echo $(call lockline) > $@.lock/owner
|
| 45 |
+
${analyze} ${params1}
|
| 46 |
+
${analyze} ${params1} ${params2}
|
| 47 |
+
@rm$@.lock/owner
|
| 48 |
+
@rmdir $@.lock
|
| 49 |
+
|
| 50 |
+
%.multi-bleu: %.cleaned
|
| 51 |
+
$(info )
|
| 52 |
+
$(info RUNNING MULTI-BLEU on $^)
|
| 53 |
+
@mkdir $@.lock
|
| 54 |
+
@echo $(call lockline) > $@.lock/owner
|
| 55 |
+
${multi-bleu} ${$(notdir $*)-ref} < $< > $@_
|
| 56 |
+
@mv $@_ $@
|
| 57 |
+
@rm $@.lock/owner
|
| 58 |
+
@rmdir $@.lock
|
| 59 |
+
|
| 60 |
+
%.truecased: %.cleaned
|
| 61 |
+
mkdir $@.lock
|
| 62 |
+
$(detruecase) < $< > $@_
|
| 63 |
+
mv $@_ $@
|
| 64 |
+
rmdir $@.lock
|
| 65 |
+
|
| 66 |
+
%.cleaned: %.output
|
| 67 |
+
$(info )
|
| 68 |
+
$(info CLEANING UP DECODER OUTPUT: $<)
|
| 69 |
+
$(info )
|
| 70 |
+
mkdir $@.lock
|
| 71 |
+
echo $(call lockline) > $@.lock/owner
|
| 72 |
+
$(clean-decoder-output) < $< > $@_
|
| 73 |
+
mv $@_ $@
|
| 74 |
+
rm $@.lock/owner
|
| 75 |
+
rmdir $@.lock
|
| 76 |
+
|
| 77 |
+
%.output.wa: %.output
|
| 78 |
+
evaluation/%.output: decoder_flags += -threads ${moses.threads} -v 0
|
| 79 |
+
evaluation/%.output: decoder_flags += -inputtype ${input-type}
|
| 80 |
+
evaluation/%.output: decoder_flags += -alignment-output-file $@.wa
|
| 81 |
+
evaluation/%.output: decoder_flags += -t -text-type "test"
|
| 82 |
+
evaluation/%.output: decoder_flags += -f ${moses_ini}
|
| 83 |
+
evaluation/%.output: input = ${$(notdir $*)-src}
|
| 84 |
+
evaluation/%.output:
|
| 85 |
+
echo MOSES_INI = ${moses_ini}
|
| 86 |
+
@mkdir -p $(@D)
|
| 87 |
+
@mkdir $@.lock
|
| 88 |
+
@echo $(call lockline) > $@.lock/owner
|
| 89 |
+
${decode} ${decoder_flags} < ${input} > $@_
|
| 90 |
+
@mv $@_ $@
|
| 91 |
+
@rm $@.lock/owner
|
| 92 |
+
@rmdir $@.lock
|
| 93 |
+
|
| 94 |
+
.SECONDARY:
|
| 95 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/run-moses.m4m
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# This make module deals with running the moses decoder.
|
| 4 |
+
# It sets default parameters and checks that parameters that
|
| 5 |
+
# need to be set elsewhere are actually set.
|
| 6 |
+
|
| 7 |
+
# The following parameters are translation-job specific and need to be set
|
| 8 |
+
# explicitly for each job.
|
| 9 |
+
|
| 10 |
+
moses.threads ?= 4
|
| 11 |
+
moses.flags += -threads ${moses.threads}
|
| 12 |
+
moses.flags += -v 0 -t -text-type "test"
|
| 13 |
+
|
| 14 |
+
%.moses-out.wa: moses.flags += -alignment-output-file $*.output.wa
|
| 15 |
+
%.moses-out.wa: %.moses-out
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
.SECONDEXPANSION:
|
| 19 |
+
%.moses-out:
|
| 20 |
+
echo MOSES $^
|
| 21 |
+
$(checkvar,moses.input)
|
| 22 |
+
$(checkvar,moses.ini)
|
| 23 |
+
$(lock)
|
| 24 |
+
${moses} -i ${moses.input} -inputtype ${moses.inputtype} \
|
| 25 |
+
-f ${moses.ini} ${moses.flags} > $@_ && mv $@_ $@
|
| 26 |
+
$(unlock)
|
| 27 |
+
|
| 28 |
+
%.cleaned: %.moses-out
|
| 29 |
+
$(lock)
|
| 30 |
+
$(clean-decoder-output) < $< > $@_ && mv $@_ $@
|
| 31 |
+
$(unlock)
|
| 32 |
+
|
| 33 |
+
%.natcased: %.cleaned
|
| 34 |
+
$(eval $(call lock))
|
| 35 |
+
$(detruecase) < $*.cleaned > $@_ && mv $@_ $@
|
| 36 |
+
$(eval $(call unlock))
|
| 37 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/setup-experiments.m4m
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# This make module sets up the actual experiments
|
| 4 |
+
|
| 5 |
+
L1 = fr
|
| 6 |
+
L2 = en
|
| 7 |
+
tune-ref-ready = /fs/sif0/bhaddow/experiments/accept/symantec-baseline/tuning/reference.tc.18
|
| 8 |
+
eval-ref-ready = /fs/saxnot5/germann/accept/homophones/exp.new/evaluation/201201_devtest_b.reference.tok.1
|
| 9 |
+
crp_train = /fs/sif0/bhaddow/experiments/accept/symantec-baseline/training/corpus.19
|
| 10 |
+
ttable = /fs/sif0/bhaddow/experiments/accept/symantec-baseline/model/phrase-table.10
|
| 11 |
+
|
| 12 |
+
untuned_moses_ini = model/moses.ini.0
|
| 13 |
+
fixed-iweight = --activate-feature d_0,d_1,d_2,d_3,d_4,d_5,d_6,lm_0,w_0,tm_0,tm_1,tm_2,tm_3,tm_4
|
| 14 |
+
|
| 15 |
+
# list the evaluation metrics to be used for evaluation
|
| 16 |
+
# TO DO: list available metrics
|
| 17 |
+
eval-metrics = multi-bleu
|
| 18 |
+
moses-threads = 20
|
| 19 |
+
tuning-runs = $(shell seq 25)
|
| 20 |
+
|
| 21 |
+
# experiments.make: WSCHEMES = uniform unigram bigram bigram2
|
| 22 |
+
# experiments.make: DATASETS = tune eval
|
| 23 |
+
# experiments.make: PREPROC = baseline uniq multi
|
| 24 |
+
# experiments.make: CSETS = unfiltered filtered edited
|
| 25 |
+
experiments.make: WSCHEMES = bigram2
|
| 26 |
+
experiments.make: DATASETS = tune eval
|
| 27 |
+
experiments.make: PREPROC = baseline
|
| 28 |
+
experiments.make: CSETS = filtered
|
| 29 |
+
# remake experiments.make if this file changes
|
| 30 |
+
experiments.make: $(word $(words ${MAKEFILE_LIST}), ${MAKEFILE_LIST})
|
| 31 |
+
experiments.make:
|
| 32 |
+
mkdir $@.lock
|
| 33 |
+
echo $(call lockline) > $@.lock/owner
|
| 34 |
+
echo '# -*- Makefile -*-' > $@_
|
| 35 |
+
echo '# This file was automatically generated by setup-experiments.make.' >> $@_
|
| 36 |
+
echo 'experiments := ' >> $@_;
|
| 37 |
+
$(foreach p, ${PREPROC}, \
|
| 38 |
+
echo '# NEW EXPERIMENT #####################################' >> $@_; \
|
| 39 |
+
echo 'experiments += $p' >> $@_; \
|
| 40 |
+
echo 'ctr = $$(words $${experiments})' >> $@_; \
|
| 41 |
+
echo '$p: input-type = 0' >> $@_; \
|
| 42 |
+
echo '$p: eval-sets = $p.eval' >> $@_; \
|
| 43 |
+
echo '$p: tune-src = input/$p.tune.tc' >> $@_; \
|
| 44 |
+
echo '$p: tune-ref = ${tune-ref-ready}' >> $@_; \
|
| 45 |
+
echo '$p: $p.eval-src = input/$p.eval.tc' >> $@_; \
|
| 46 |
+
echo '$p: $p.eval-ref = ${eval-ref-ready}' >> $@_; \
|
| 47 |
+
echo '$p: evaluation/$${ctr}/report' >> $@_; \
|
| 48 |
+
echo >> $@_; \
|
| 49 |
+
echo 'evaluation/$p/%/$p.eval.output: input = input/$p.eval.tc' >> $@_; \
|
| 50 |
+
echo 'evaluation/$p/%/$p.eval.output: input/$p.eval.tc' >> $@_; \
|
| 51 |
+
echo $(if $(findstring ini,${tuned_moses_ini}), \
|
| 52 |
+
'evaluation/$${ctr}/$p.eval.output: ${tuned_moses_ini}', \
|
| 53 |
+
'evaluation/$${ctr}/$p.eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \
|
| 54 |
+
echo $(if $(findstring ini,${tuned_moses_ini}), \
|
| 55 |
+
'evaluation/$${ctr}/$p.eval.output: moses_ini := ${tuned_moses_ini}', \
|
| 56 |
+
'evaluation/$${ctr}/$p.eval.output: moses_ini := tuning/$${ctr}/moses.tuned.ini') >> $@_; \
|
| 57 |
+
echo 'evaluation/$${ctr}/$p.eval.multi-bleu: $${$p.eval-ref}' >> $@_; \
|
| 58 |
+
echo >> $@_;)
|
| 59 |
+
$(foreach c, ${CSETS}, \
|
| 60 |
+
$(foreach p, ${PREPROC}, \
|
| 61 |
+
$(foreach w, ${WSCHEMES}, \
|
| 62 |
+
echo '# NEW EXPERIMENT #####################################' >> $@_; \
|
| 63 |
+
echo 'experiments += $w-$c-$p' >> $@_; \
|
| 64 |
+
echo 'ctr = $$(words $${experiments})' >> $@_; \
|
| 65 |
+
echo '$w-$c-$p: input-type = 1' >> $@_; \
|
| 66 |
+
echo '$w-$c-$p: eval-sets = $w-$c-$p.eval' >> $@_; \
|
| 67 |
+
echo '$w-$c-$p: tune-src = input/$w-$c-$p.tune.cfn' >> $@_; \
|
| 68 |
+
echo '$w-$c-$p: tune-ref = ${tune-ref-ready}' >> $@_; \
|
| 69 |
+
echo '$w-$c-$p: $w-$c-$p.eval-src = input/$w-$c-$p.eval.cfn' >> $@_; \
|
| 70 |
+
echo '$w-$c-$p: $w-$c-$p.eval-ref = ${eval-ref-ready}' >> $@_; \
|
| 71 |
+
echo '$w-$c-$p: evaluation/$${ctr}/report' >> $@_; \
|
| 72 |
+
echo >> $@_; \
|
| 73 |
+
echo 'evaluation/$${ctr}/$w-$c-$p.eval.output: input = input/$w-$c-$p.eval.cfn' >> $@_; \
|
| 74 |
+
echo 'evaluation/$${ctr}/$w-$c-$p.eval.output: input/$w-$c-$p.eval.cfn' >> $@_; \
|
| 75 |
+
echo $(if $(findstring ini,${tuned_moses_ini}), \
|
| 76 |
+
'evaluation/$${ctr}/$w-$c-$p.eval.output: ${tuned_moses_ini}', \
|
| 77 |
+
'evaluation/$${ctr}/$w-$c-$p.eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \
|
| 78 |
+
echo $(if $(findstring ini,${tuned_moses_ini}), \
|
| 79 |
+
'evaluation/$${ctr}/$w-$c-$p.eval.output: moses_ini := ${tuned_moses_ini}', \
|
| 80 |
+
'evaluation/$${ctr}/$w-$c-$p.eval.output: moses_ini := tuning/$${ctr}/moses.tuned.ini') >> $@_; \
|
| 81 |
+
echo 'evaluation/$${ctr}/$w-$c-$p.eval.multi-bleu: $${$w-$c-$p.eval-ref}' >> $@_; \
|
| 82 |
+
echo >> $@_;\
|
| 83 |
+
$(foreach d, tune eval, \
|
| 84 |
+
echo 'cfn-targets += input/$w-$c-$p.$d.cfn' >> $@_; \
|
| 85 |
+
echo 'input/$w-$c-$p.$d.cfn: input/$p.$d.tc' >> $@_; \
|
| 86 |
+
printf '\t@mkdir $$@.lock\n\t@echo $$(call lockline) > $$@.lock/owner\n' >> $@_; \
|
| 87 |
+
printf '\tcreate-confusion-network.01.exe -q -w $w -s csets/csets.$c.txt -c ../mm/fr < $$< > $$@_ && mv $$@_ $$@\n' >> $@_;\
|
| 88 |
+
printf '\t@rm $$@.lock/owner\n\t@rmdir $$@.lock\n' >> $@_;))))
|
| 89 |
+
echo '.PHONY += $$(experiments) cfn' >> $@_
|
| 90 |
+
echo 'cfns: $${cfn-targets}' >> $@_
|
| 91 |
+
@mv $@_ $@
|
| 92 |
+
@rm $@.lock/owner
|
| 93 |
+
@rmdir $@.lock
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# # echo 'ctr = $$(words $${experiments})' >> $@_; \
|
| 98 |
+
# echo 'eval-sets = $w-$c-$p.eval' >> $@_; \
|
| 99 |
+
# echo 'rx := $$(call report-prereqs,$${ctr},$${eval-sets})' >> $@_; \
|
| 100 |
+
# echo '$w-$c-$p: run-id := $${ctr}' >> $@_; \
|
| 101 |
+
# echo '$w-$c-$p: tune-input = input/$w-$c-$p.tune.cfn' >> $@_; \
|
| 102 |
+
# echo '$w-$c-$p: tune-src = input/$w-$c-$p.tune.cfn' >> $@_; \
|
| 103 |
+
# echo '$w-$c-$p: tune-ref = ${tune-ref-ready}' >> $@_; \
|
| 104 |
+
# echo '$w-$c-$p: $w-$c-$p.eval-src = input/$w-$c-$p.eval.cfn' >> $@_; \
|
| 105 |
+
# echo '$w-$c-$p: $w-$c-$p.eval-ref = ${eval-ref-ready}' >> $@_; \
|
| 106 |
+
# echo '$w-$c-$p: input-type = 1' >> $@_; \
|
| 107 |
+
# echo '$w-$c-$p: mert.options += $$(if $$(findstring uniform,$w),${fixed-iweight})' >> $@_; \
|
| 108 |
+
# echo '$w-$c-$p: evaluation/report.$${ctr}' >> $@_; \
|
| 109 |
+
# echo >> $@_; \
|
| 110 |
+
# echo 'evaluation/$w-$c-$p.eval.output.$${ctr}: input = input/$w-$c-$p.eval.cfn' >> $@_; \
|
| 111 |
+
# echo >> $@_; \
|
| 112 |
+
# $(foreach d, tune eval, \
|
| 113 |
+
# ofile=input/$w-$c-$p.$d.cfn; \
|
| 114 |
+
# ifile=input/$p.$d.tc; \
|
| 115 |
+
# echo "$$ofile: $$ifile" >> $@_ ; \
|
| 116 |
+
# printf '\t create-confusion-network.01.exe -w $w -s csets/cset.$c.txt -c ../mm/fr < $$< > $$@_ && mv $$@_ $$@\n' >> $@_ ; \
|
| 117 |
+
# echo >> $@_; ))))
|
| 118 |
+
# echo '.PHONY += $$(experiments)' >> $@_
|
| 119 |
+
# @mv $@_ $@
|
| 120 |
+
# @rm $@.lock/owner
|
| 121 |
+
# @rmdir $@.lock
|
mosesdecoder/contrib/m4m/modules/obsolete/skip-steps.mak
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# Specify in this file resources that you already have
|
| 4 |
+
run_id ?= 0
|
| 5 |
+
|
| 6 |
+
untuned_moses_ini := model/moses.ini.0
|
| 7 |
+
moses_ini_for_tuning = ${untuned_moses_ini}
|
| 8 |
+
moses_ini_for_eval = ${tuned_moses_ini}
|
| 9 |
+
|
| 10 |
+
# Notes:
|
| 11 |
+
#
|
| 12 |
+
# - if ${moses_ini_for_tuning} is different from ${untuned_mose_ini}, the phrase table and the
|
| 13 |
+
# lexical distortion table will be filtered for tuning (see tune.make)
|
| 14 |
+
# - if ${moses_ini_for_eval} is different from ${tuned_mose_ini}, the phrase table and the
|
| 15 |
+
# lexical distortion table will be filtered for evaluation (see eval.make)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
all:
|
| 19 |
+
echo ";$(foo);"
|
mosesdecoder/contrib/m4m/modules/obsolete/system.m4m
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
# This module defines the actual system
|
| 4 |
+
|
| 5 |
+
# Choose names for translation and distortion model
|
| 6 |
+
ptable = model/tm/ptable.${L1}-${L2}
|
| 7 |
+
dtable = model/dm/dtable.${L1}-${L2}
|
| 8 |
+
|
| 9 |
+
# specify the underlying corpus
|
| 10 |
+
pll.txt1 ?= crp/trn/aln/${word-alignment}/${L1}.txt.gz
|
| 11 |
+
pll.txt2 ?= crp/trn/aln/${word-alignment}/${L2}.txt.gz
|
| 12 |
+
pll.aln ?= crp/trn/aln/${word-alignment}/${L1}-${L2}.symal.gz
|
| 13 |
+
|
| 14 |
+
# specify the distortion model parameters; we bunch them
|
| 15 |
+
# all together in one string
|
| 16 |
+
${ptable}: dmodels = wbe-mslr-bidirectional-fe-allff
|
| 17 |
+
|
| 18 |
+
# phrase table parameters: maximum phrase length and smoothing
|
| 19 |
+
ptable.max-phrase-length = 7
|
| 20 |
+
ptable.smoothing = --GoodTuring
|
| 21 |
+
|
| 22 |
+
#$(info $(call add_binary_phrase_table,0,0,5,${ptable},info))
|
| 23 |
+
$(eval $(call add_binary_phrase_table,0,0,5,${ptable}))
|
| 24 |
+
|
| 25 |
+
$(eval $(call add_binary_reordering_table,\
|
| 26 |
+
0-0,wbe-mslr-bidirectional-fe-allff,6,${dtable},${ptable}))
|
| 27 |
+
|
| 28 |
+
$(info $(call add_binary_reordering_table,\
|
| 29 |
+
0-0,wbe-mslr-bidirectional-fe-allff,6,${dtable},${ptable},info))
|
| 30 |
+
|
| 31 |
+
# below: moses.ini.0 is the moses ini file PRE-TUNING!
|
| 32 |
+
define build_system
|
| 33 |
+
$1/moses.ini.0
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
makefile:
|
| 37 |
+
$(info $(call add_binary_phrase_table,0,0,5,${ptable},info))
|
| 38 |
+
|
mosesdecoder/contrib/m4m/modules/obsolete/template.m4m
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
|
| 3 |
+
define setup =
|
| 4 |
+
echo 'experiments := ' >> $@_; \
|
| 5 |
+
$(foreach p, ${PREPROC}, \
|
| 6 |
+
echo '# NEW EXPERIMENT #####################################' >> $@_; \
|
| 7 |
+
echo 'experiments += ${tag}' >> $@_; \
|
| 8 |
+
echo 'ctr = $$(words $${experiments})' >> $@_; \
|
| 9 |
+
echo '$: input-type = $(2)' >> $@_; \
|
| 10 |
+
echo '${tag}: eval-sets = ${tag}.eval' >> $@_; \
|
| 11 |
+
echo '${tag}: tune-src = input/${tag}.tune.tc' >> $@_; \
|
| 12 |
+
echo '${tag}: tune-ref = ${tune-ref-ready}' >> $@_; \
|
| 13 |
+
echo '${tag}: ${tag}.eval-src = input/${tag}.eval.$(if $(findstring 1,$(2),cfn,tc))' >> $@_; \
|
| 14 |
+
echo '${tag}: ${tag}.eval-ref = ${eval-ref-ready}' >> $@_; \
|
| 15 |
+
echo '${tag}: evaluation/$${ctr}/report' >> $@_; \
|
| 16 |
+
$(foreach e, ${tag}.eval, \
|
| 17 |
+
$(foreach m, ${eval-metrics}, \
|
| 18 |
+
echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.$m' >> $@_;) \
|
| 19 |
+
echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.analysis' >> $@_;) \
|
| 20 |
+
echo >> $@_; \
|
| 21 |
+
echo 'evaluation/$${ctr}/${tag}.eval.output: input = input/${tag}.eval.tc' >> $@_; \
|
| 22 |
+
echo 'evaluation/$${ctr}/${tag}.eval.output: input/${tag}.eval.tc' >> $@_; \
|
| 23 |
+
echo $(if $(findstring ini,${tuned_moses_ini}), \
|
| 24 |
+
'evaluation/$${ctr}/${tag}.eval.output: ${tuned_moses_ini}', \
|
| 25 |
+
'evaluation/$${ctr}/${tag}.eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \
|
| 26 |
+
echo 'evaluation/$${ctr}/${tag}.eval.multi-bleu: $${${tag}.eval-ref}' >> $@_; \
|
| 27 |
+
echo >> $@_;)
|
| 28 |
+
echo '.PHONY += $$(experiments)' >> $@_
|
| 29 |
+
@mv $@_ $@
|
| 30 |
+
@rm $@.lock/owner
|
| 31 |
+
@rmdir $@.lock
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# $(1): system / input processing
|
| 35 |
+
# $(2): input type (cfn or text)
|
| 36 |
+
define setup_experiment =
|
| 37 |
+
echo 'experiments := ' >> $@_; \
|
| 38 |
+
$(foreach p, ${PREPROC}, \
|
| 39 |
+
echo '# NEW EXPERIMENT #####################################' >> $@_; \
|
| 40 |
+
echo 'experiments += $(1)' >> $@_; \
|
| 41 |
+
echo 'ctr = $$(words $${experiments})' >> $@_; \
|
| 42 |
+
echo '$(1): input-type = $(2)' >> $@_; \
|
| 43 |
+
echo '$(1): eval-sets = $(1).eval' >> $@_; \
|
| 44 |
+
echo '$(1): tune-src = input/$(1).tune.tc' >> $@_; \
|
| 45 |
+
echo '$(1): tune-ref = ${tune-ref-ready}' >> $@_; \
|
| 46 |
+
echo '$(1): $(1).eval-src = input/$(1).eval.$(if $(findstring 1,$(2),cfn,tc))' >> $@_; \
|
| 47 |
+
echo '$(1): $(1).eval-ref = ${eval-ref-ready}' >> $@_; \
|
| 48 |
+
echo '$(1): evaluation/$${ctr}/report' >> $@_; \
|
| 49 |
+
$(foreach e, $(1).eval, \
|
| 50 |
+
$(foreach m, ${eval-metrics}, \
|
| 51 |
+
echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.$m' >> $@_;) \
|
| 52 |
+
echo 'evaluation/$${ctr}/report: evaluation/$${ctr}/$e.analysis' >> $@_;) \
|
| 53 |
+
echo >> $@_; \
|
| 54 |
+
echo 'evaluation/$${ctr}/$(1).eval.output: input = input/$(1).eval.tc' >> $@_; \
|
| 55 |
+
echo 'evaluation/$${ctr}/$(1).eval.output: input/$(1).eval.tc' >> $@_; \
|
| 56 |
+
echo $(if $(findstring ini,${tuned_moses_ini}), \
|
| 57 |
+
'evaluation/$${ctr}/$(1).eval.output: ${tuned_moses_ini}', \
|
| 58 |
+
'evaluation/$${ctr}/$(1).eval.output: tuning/$${ctr}/moses.tuned.ini') >> $@_; \
|
| 59 |
+
echo 'evaluation/$${ctr}/$(1).eval.multi-bleu: $${$(1).eval-ref}' >> $@_; \
|
| 60 |
+
echo >> $@_;)
|
| 61 |
+
echo '.PHONY += $$(experiments)' >> $@_
|
| 62 |
+
@mv $@_ $@
|
| 63 |
+
@rm $@.lock/owner
|
| 64 |
+
@rmdir $@.lock
|
| 65 |
+
|
| 66 |
+
endef
|
mosesdecoder/contrib/m4m/modules/obsolete/tune.m4m
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- Makefile -*-
|
| 2 |
+
# make module for tuning a system
|
| 3 |
+
|
| 4 |
+
#tune.input ?= $(error missing
|
| 5 |
+
#tuned_moses_ini ?= tuning/moses.ini.${run_id}
|
| 6 |
+
#$(tuned_moses_ini): | ${untuned_moses_ini}
|
| 7 |
+
#$(tuned_moses_ini): | ${untuned_moses_ini}
|
| 8 |
+
|
| 9 |
+
# make sure that all necessary variables are set
|
| 10 |
+
untuned_moses_ini ?= $(error Fatal error: the required variable untuned_moses_ini is not set)
|
| 11 |
+
tuning_input ?= $(error Fatal error: the required variable tuning_input is not set)
|
| 12 |
+
tuning_reference ?= $(error Fatal error: the required variable tuning_reference is not set)
|
| 13 |
+
tuning_itype ?= $(error Fatal error: the required variable tuning_itype is not set)
|
| 14 |
+
tuning_wdir ?= $(error Fatal error: the required variable tuning_wdir is not set)
|
| 15 |
+
|
| 16 |
+
$tuning_root_dir ?= ${MOSES_ROOT}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# default tuning parameters
|
| 21 |
+
mert.nbest ?= 100
|
| 22 |
+
mert.decoder-threads ?= 4
|
| 23 |
+
tuning/%/tmp/moses.ini: mertcmd =
|
| 24 |
+
tuning/%/tmp/moses.ini: mert_flags += --working-dir $(CURDIR)/tuning/$*/tmp
|
| 25 |
+
tuning/%/tmp/moses.ini: mert_flags += --decoder-flags "${mert.decoder_flags} -inputtype ${input-type}"
|
| 26 |
+
tuning/%/tmp/moses.ini: mert_flags += --rootdir ${MOSES_ROOT}/scripts
|
| 27 |
+
tuning/%/tmp/moses.ini: mert_flags += --mertdir ${MOSES_BIN}
|
| 28 |
+
tuning/%/tmp/moses.ini: mert_flags += ${mert.options}
|
| 29 |
+
tuning/%/tmp/moses.ini: ${untuned_moses_ini}
|
| 30 |
+
$(info TUNING: ${tune} ${tune-src} ${tune-ref} ${decode} ${untuned_moses_ini} ${mert_flags})
|
| 31 |
+
@mkdir -p $(@D)
|
| 32 |
+
@mkdir $@.lock
|
| 33 |
+
@echo $(call lockline) > $@.lock/owner
|
| 34 |
+
${tune} ${mert_flags} ${tune-src} ${tune-ref} ${decode} ${untuned_moses_ini}
|
| 35 |
+
@rm $@.lock/owner
|
| 36 |
+
@rmdir $@.lock
|
| 37 |
+
|
| 38 |
+
tuning/%/moses.tuned.ini: tuning/%/tmp/moses.ini
|
| 39 |
+
@mkdir -p $(@D)
|
| 40 |
+
@mkdir $@.lock
|
| 41 |
+
@echo $(call lockline) > $@.lock/owner
|
| 42 |
+
${apply-weights} tuning/$*/tmp/moses.ini < ${untuned_moses_ini} > $@_
|
| 43 |
+
@mv $@_ $@
|
| 44 |
+
@rm $@.lock/owner
|
| 45 |
+
@rmdir $@.lock
|
mosesdecoder/contrib/m4m/scripts/fast-align2bal.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# Auxiliary script to convert fast_align output to the "bal" input format
|
| 3 |
+
# that symal requires.
|
| 4 |
+
# Script by Ulrich Germann.
|
| 5 |
+
|
| 6 |
+
# command line args:
|
| 7 |
+
# <L1 plain text> <L2 plain text> <L1-L2 alignments> <L2-L1 alignments>
|
| 8 |
+
#
|
| 9 |
+
# TO DO: - proper argument parsing with getopt
|
| 10 |
+
# - help text
|
| 11 |
+
|
| 12 |
+
import sys,os
|
| 13 |
+
|
| 14 |
+
(T1,T2,fwd,bwd) = [open(x) for x in sys.argv[1:]]
|
| 15 |
+
|
| 16 |
+
def alnvec(slen,alinks,mode):
|
| 17 |
+
d = dict([[int(x[mode]),int(x[(mode+1)%2])+1] for x
|
| 18 |
+
in [y.split('-') for y in alinks]])
|
| 19 |
+
return [d.get(i,0) for i in xrange(slen)]
|
| 20 |
+
|
| 21 |
+
ctr = 0
|
| 22 |
+
for t1 in T1:
|
| 23 |
+
t1 = t1.strip().split()
|
| 24 |
+
t2 = T2.readline().strip().split()
|
| 25 |
+
a1 = alnvec(len(t1),bwd.readline().split(),0)
|
| 26 |
+
a2 = alnvec(len(t2),fwd.readline().split(),1)
|
| 27 |
+
print 1
|
| 28 |
+
print len(t2), " ".join(t2), '#', " ".join(["%d"%x for x in a2])
|
| 29 |
+
print len(t1), " ".join(t1), '#', " ".join(["%d"%x for x in a1])
|
| 30 |
+
ctr += 1
|
| 31 |
+
pass
|
mosesdecoder/contrib/m4m/scripts/giza.txt2snt.sh
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Wrapper script around plain2snt that allows us to generate the numberized
|
| 3 |
+
# files from gzipped text files via named pipes. (c) 2011-2012 Ulrich Germann
|
| 4 |
+
|
| 5 |
+
fail()
|
| 6 |
+
{
|
| 7 |
+
echo $@
|
| 8 |
+
exit 1
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
on_term()
|
| 12 |
+
{
|
| 13 |
+
rm $odir/${L1}
|
| 14 |
+
rm $odir/${L2}
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
trap 'on_term' TERM EXIT QUIT INT 0
|
| 18 |
+
|
| 19 |
+
if [ $# -lt 4 ]; then
|
| 20 |
+
fail "usage: $0 <txtdir> <L1> <L2> <odir>"
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
txtdir=$1
|
| 24 |
+
L1=$2
|
| 25 |
+
L2=$3
|
| 26 |
+
odir=$4
|
| 27 |
+
|
| 28 |
+
mkdir -p $odir
|
| 29 |
+
mkfifo $odir/${L1} || exit 1
|
| 30 |
+
mkfifo $odir/${L2} || exit 1
|
| 31 |
+
|
| 32 |
+
find -L ${txtdir} -name "*.${L1}" -or -name "*.${L1}.gz" | sort | xargs zcat -f > $odir/${L1} &
|
| 33 |
+
find -L ${txtdir} -name "*.${L2}" -or -name "*.${L2}.gz" | sort | xargs zcat -f > $odir/${L2} &
|
| 34 |
+
|
| 35 |
+
pushd $odir
|
| 36 |
+
plain2snt ${L1} ${L2}
|
| 37 |
+
wait
|
| 38 |
+
mv ${L1}_${L2}.snt ${L1}-${L2}.snt
|
| 39 |
+
mv ${L2}_${L1}.snt ${L2}-${L1}.snt
|
| 40 |
+
wait
|
| 41 |
+
popd
|
mosesdecoder/contrib/m4m/scripts/moses.extract-phrases.sh
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# helper script for phrase extraction
|
| 3 |
+
# (c) 2011-2012 Ulrich Germann
|
| 4 |
+
# txtdir - directory with gzipped plain text files
|
| 5 |
+
# sntdir - directory with files in Giza's .snt format, also including the .OK files
|
| 6 |
+
# produced by giza.txt2snt.sh
|
| 7 |
+
# gizdir - directory where aligned corpus resides
|
| 8 |
+
# L1,L2 - language tags for L1,L2
|
| 9 |
+
# plmax - max phrase length to be extraced
|
| 10 |
+
|
| 11 |
+
extractor=$1
|
| 12 |
+
L1_text=$2
|
| 13 |
+
L2_text=$3
|
| 14 |
+
aln=$4
|
| 15 |
+
odir=$5
|
| 16 |
+
max_plen=$6
|
| 17 |
+
dmodel=$7
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
echo $#
|
| 21 |
+
if [ $# -lt 6 ] ; then
|
| 22 |
+
echo <<EOF \
|
| 23 |
+
"usage: $0 <moses-extract-command> <L1 text> <L2 text> <alignment file> <output dir> <max phrase length> <distortion-model>"
|
| 24 |
+
EOF
|
| 25 |
+
exit 1
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
fifo=$odir/fifo.$$
|
| 29 |
+
|
| 30 |
+
cleanup()
|
| 31 |
+
{
|
| 32 |
+
if [ -e $fifo ] ; then rm $fifo; fi
|
| 33 |
+
if [ -e $fifo.inv ] ; then rm $fifo.inv; fi
|
| 34 |
+
if [ -e $fifo.o ] ; then rm $fifo.o; fi
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
trap 'cleanup' 0
|
| 38 |
+
export LC_ALL=C
|
| 39 |
+
mkdir -p $odir/fwd $odir/bwd $odir/dst
|
| 40 |
+
mkfifo $fifo
|
| 41 |
+
parallel < $fifo -j6 --pipe --blocksize 250M "sort -S 5G | gzip > $odir/fwd/part.{#}.gz" &
|
| 42 |
+
mkfifo $fifo.inv
|
| 43 |
+
parallel < $fifo.inv -j6 --pipe --blocksize 250M "sort -S 5G | gzip > $odir/bwd/part.{#}.gz" &
|
| 44 |
+
if [ "$dmodel" != "" ] ; then
|
| 45 |
+
mkfifo $fifo.o
|
| 46 |
+
parallel < $fifo.o -j6 --pipe --blocksize 250M "sort -S 5G | gzip > $odir/dst/part.{#}.gz" &
|
| 47 |
+
dmodel="orientation --model $dmodel"
|
| 48 |
+
fi
|
| 49 |
+
#echo "($extractor <(zcat -f $L2_text) <(zcat -f $L1_text) <(zcat -f $aln) $fifo $max_plen $dmodel) || exit 1"
|
| 50 |
+
($extractor <(zcat -f $L2_text) <(zcat -f $L1_text) <(zcat -f $aln) $fifo $max_plen $dmodel) || exit 1
|
| 51 |
+
|
| 52 |
+
wait
|
| 53 |
+
|
| 54 |
+
# for part in fwd bwd dst; do
|
| 55 |
+
# echo -n '' > $odir/${part}/sort.batch
|
| 56 |
+
# for f in $odir/${part}/part.[0-9][0-9][0-9][0-9].gz; do
|
| 57 |
+
# g=`echo $f | sed 's/.gz$//'`
|
| 58 |
+
# # echo "f=$g; if [ -e \$f.gz ] ; then zcat \$f.gz | LC_ALL=C sort | gzip > \$f.gz_ && mv \$f.gz_ \$f.sorted.gz && rm \$f.gz; fi" \
|
| 59 |
+
# echo "f=$g; if [ -e \$f.gz ] ; then zcat \$f.gz | LC_ALL=C sort | gzip > \$f.gz_ && mv \$f.gz_ \$f.sorted.gz; fi" \
|
| 60 |
+
# >> $odir/${part}/sort.batch
|
| 61 |
+
# done
|
| 62 |
+
# done
|
| 63 |
+
|
mosesdecoder/contrib/m4m/scripts/moses.make-lex.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
# Quick hack to extract lexica from Giza-Aligned corpus
|
| 4 |
+
# (c) 2011 Ulrich Germann
|
| 5 |
+
|
| 6 |
+
import sys, os
|
| 7 |
+
|
| 8 |
+
D = os.popen("zcat %s" % sys.argv[1])
|
| 9 |
+
E = os.popen("zcat %s" % sys.argv[2])
|
| 10 |
+
A = os.popen("zcat %s" % sys.argv[3])
|
| 11 |
+
d_given_e = sys.argv[4]
|
| 12 |
+
e_given_d = sys.argv[5]
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
os.makedirs(os.path.dirname(d_given_e))
|
| 16 |
+
os.makedirs(os.path.dirname(e_given_d))
|
| 17 |
+
except:
|
| 18 |
+
pass
|
| 19 |
+
|
| 20 |
+
WD = ["NULL","UNK"]
|
| 21 |
+
WE = ["NULL","UNK"]
|
| 22 |
+
VD = {}
|
| 23 |
+
VE = {}
|
| 24 |
+
JJ = []
|
| 25 |
+
MD = []
|
| 26 |
+
ME = []
|
| 27 |
+
|
| 28 |
+
def id(V,W,x):
|
| 29 |
+
i = V.setdefault(x,len(W))
|
| 30 |
+
if i == len(W): W.append(x)
|
| 31 |
+
return i
|
| 32 |
+
|
| 33 |
+
ctr = 0
|
| 34 |
+
for dline in D:
|
| 35 |
+
ctr += 1
|
| 36 |
+
#if ctr % 1000 == 0: sys.stderr.write('.')
|
| 37 |
+
eline = E.readline()
|
| 38 |
+
aline = A.readline()
|
| 39 |
+
d = [id(VD,WD,w) for w in dline.strip().split()]
|
| 40 |
+
e = [id(VE,WE,w) for w in eline.strip().split()]
|
| 41 |
+
a = [[int(y) for y in x.split('-')] for x in aline.split()]
|
| 42 |
+
|
| 43 |
+
while len(MD) <= len(VD) + 2:
|
| 44 |
+
MD.append(0)
|
| 45 |
+
JJ.append({})
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
while len(ME) <= len(VE) + 2:
|
| 49 |
+
ME.append(0)
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
fd = [0 for i in xrange(len(d))]
|
| 53 |
+
fe = [0 for i in xrange(len(e))]
|
| 54 |
+
for x,y in a:
|
| 55 |
+
fd[x] += 1
|
| 56 |
+
fe[y] += 1
|
| 57 |
+
MD[d[x]] += 1
|
| 58 |
+
ME[e[y]] += 1
|
| 59 |
+
JJ[d[x]][e[y]] = JJ[d[x]].setdefault(e[y],0) + 1
|
| 60 |
+
# print WD[d[x]],WE[e[y]],JJ[d[x]][e[y]]
|
| 61 |
+
pass
|
| 62 |
+
for i in [d[k] for k in xrange(len(d)) if fd[k] == 0]:
|
| 63 |
+
ME[0] += 1
|
| 64 |
+
MD[i] += 1
|
| 65 |
+
JJ[i][0] = JJ[i].setdefault(0,0) + 1
|
| 66 |
+
pass
|
| 67 |
+
for i in [e[k] for k in xrange(len(e)) if fe[k] == 0]:
|
| 68 |
+
ME[i] += 1
|
| 69 |
+
MD[0] += 1
|
| 70 |
+
JJ[0][i] = JJ[0].setdefault(i,0) + 1
|
| 71 |
+
pass
|
| 72 |
+
pass
|
| 73 |
+
|
| 74 |
+
ED = os.popen("gzip > %s" % e_given_d, 'w')
|
| 75 |
+
DE = os.popen("gzip > %s" % d_given_e, 'w')
|
| 76 |
+
|
| 77 |
+
for d in xrange(len(JJ)):
|
| 78 |
+
T = JJ[d]
|
| 79 |
+
for e,jj in T.items():
|
| 80 |
+
print >>ED, WE[e], WD[d], float(jj)/MD[d]
|
| 81 |
+
print >>DE, WD[d], WE[e], float(jj)/ME[e]
|
| 82 |
+
pass
|
| 83 |
+
pass
|
| 84 |
+
|
| 85 |
+
ED.close()
|
| 86 |
+
DE.close()
|
mosesdecoder/contrib/m4m/scripts/moses.phrase-extract.sh
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Helper script for phrase extraction from a single corpus shard.
|
| 3 |
+
# Written by Ulrich Germann.
|
| 4 |
+
|
| 5 |
+
# to be added: built-in factor filtering for factored models
|
| 6 |
+
|
| 7 |
+
cleanup()
|
| 8 |
+
{
|
| 9 |
+
if [ -e $fifo ] ; then rm $fifo; fi
|
| 10 |
+
if [ -e $fifo.inv ] ; then rm $fifo.inv; fi
|
| 11 |
+
if [ -e $fifo.o ] ; then rm $fifo.o; fi
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
usage()
|
| 15 |
+
{
|
| 16 |
+
echo
|
| 17 |
+
echo "$0: wrapper script to extract phrases from word-aligned corpus"
|
| 18 |
+
echo -e "usage:\n $0 <extractor> <ibase> <L1tag> <L2tag> [-x] "
|
| 19 |
+
echo "options:"
|
| 20 |
+
echo "-l: maximum phrase length ($plen)"
|
| 21 |
+
echo "-m: distortion model specification"
|
| 22 |
+
echo "-o: base name for output files .fwd.gz .bwd.gz [.<dmodel>.dst.gz]"
|
| 23 |
+
echo "-x: (no argument) don't create .fwd.gz and .bwd.gz"
|
| 24 |
+
echo
|
| 25 |
+
echo "required input files: <ibase>.<L1tag>.gz ibase.<L2tag>.gz ibase.<aln>.gz"
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
plen=7
|
| 29 |
+
nottable=
|
| 30 |
+
dmodel=
|
| 31 |
+
dspec=
|
| 32 |
+
pargs=
|
| 33 |
+
sfactors=
|
| 34 |
+
tfactors=
|
| 35 |
+
while [ $# -gt 0 ]; do
|
| 36 |
+
case $1 in
|
| 37 |
+
-l*) plen=${1#-l}
|
| 38 |
+
plen=${plen#=}
|
| 39 |
+
if [ -z $plen ] ; then
|
| 40 |
+
shift
|
| 41 |
+
plen=$1
|
| 42 |
+
fi
|
| 43 |
+
;;
|
| 44 |
+
-m*) dmodel=${1#-m}
|
| 45 |
+
dmodel=${dmodel#=}
|
| 46 |
+
if [ -z $dmodel ] ; then
|
| 47 |
+
shift
|
| 48 |
+
dmodel="$1"
|
| 49 |
+
fi
|
| 50 |
+
;;
|
| 51 |
+
-o*) obase=${1#-o}
|
| 52 |
+
obase=${obase#=}
|
| 53 |
+
if [ -z $obase ] ; then
|
| 54 |
+
shift
|
| 55 |
+
obase=$1
|
| 56 |
+
fi
|
| 57 |
+
;;
|
| 58 |
+
-s*) sfactors=${1#-s}
|
| 59 |
+
sfactors=${sfactors#=}
|
| 60 |
+
if [ -z $sfactors ] ; then
|
| 61 |
+
shift
|
| 62 |
+
sfactors = $1
|
| 63 |
+
fi
|
| 64 |
+
;;
|
| 65 |
+
-t*) tfactors=${1#-t}
|
| 66 |
+
tfactors=${tfactors#=}
|
| 67 |
+
if [ -z $tfactors ] ; then
|
| 68 |
+
shift
|
| 69 |
+
sfactors = $1
|
| 70 |
+
fi
|
| 71 |
+
;;
|
| 72 |
+
-x) nottable=1;;
|
| 73 |
+
-h) usage; exit 0;;
|
| 74 |
+
*) pargs=(${pargs[*]} $1);;
|
| 75 |
+
esac
|
| 76 |
+
shift
|
| 77 |
+
done
|
| 78 |
+
|
| 79 |
+
if [ -n "$sfactors" ] || [ -n "$tfactors" ] ; then
|
| 80 |
+
echo "Factor filtering is not implemented yet!"
|
| 81 |
+
exit 2
|
| 82 |
+
fi
|
| 83 |
+
|
| 84 |
+
extract=${pargs[0]}
|
| 85 |
+
ibase=${pargs[1]}
|
| 86 |
+
L1tag=${pargs[2]}
|
| 87 |
+
L2tag=${pargs[3]}
|
| 88 |
+
obase=${obase:=$ibase}
|
| 89 |
+
|
| 90 |
+
fifo=$obase.$$
|
| 91 |
+
trap 'cleanup' 0
|
| 92 |
+
|
| 93 |
+
export LC_ALL=C
|
| 94 |
+
if [ -z "$nottable" ] ; then
|
| 95 |
+
mkfifo $fifo; sort -S 5G < $fifo | gzip > $obase.fwd.gz &
|
| 96 |
+
mkfifo $fifo.inv; sort -S 5G < $fifo.inv | gzip > $obase.bwd.gz &
|
| 97 |
+
fi
|
| 98 |
+
if [ -n "$dmodel" ] ; then
|
| 99 |
+
mkfifo $fifo.o
|
| 100 |
+
sort -S 5G < $fifo.o | gzip > $obase.dst.gz &
|
| 101 |
+
dspec="orientation --model "
|
| 102 |
+
dspec+=`echo $dmodel | perl -pe 's/((hier|phrase|wbe)-(msd|msrl|mono)).*/$1/;'`
|
| 103 |
+
fi
|
| 104 |
+
|
| 105 |
+
txt1=${ibase}.${L1tag}.gz
|
| 106 |
+
txt2=${ibase}.${L2tag}.gz
|
| 107 |
+
aln=${ibase}.aln.gz
|
| 108 |
+
echo "($extract <(zcat -f $txt1) <(zcat -f $txt2) <(zcat -f $aln) $fifo $plen $dspec) || exit 1"
|
| 109 |
+
($extract <(zcat -f $txt2) <(zcat -f $txt1) <(zcat -f $aln) $fifo $plen $dspec) || exit 1
|
| 110 |
+
wait
|
mosesdecoder/contrib/m4m/scripts/moses.score-phrases.sh
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Wrapper script around the moses phrase scoring utility.
|
| 3 |
+
# Script by Ulrich Germann. Called from within M4M.
|
| 4 |
+
#
|
| 5 |
+
# lexicon given should be
|
| 6 |
+
# de-given-en for fwd
|
| 7 |
+
# en-given-de for bwd
|
| 8 |
+
|
| 9 |
+
binary=$1
|
| 10 |
+
phrases=$2
|
| 11 |
+
lex=$3
|
| 12 |
+
obase=$4
|
| 13 |
+
smoothing=$5
|
| 14 |
+
inv=$6
|
| 15 |
+
|
| 16 |
+
cleanup()
|
| 17 |
+
{
|
| 18 |
+
if [ -e $obase.$$ ] ; then rm $obase.$$; fi
|
| 19 |
+
if [ -e $obase.$$.coc ] ; then mv $obase.$$.coc $obase.coc; fi
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
mkfifo $obase.$$ || exit 1
|
| 23 |
+
|
| 24 |
+
trap 'cleanup' 0
|
| 25 |
+
|
| 26 |
+
export LC_ALL=C
|
| 27 |
+
if [[ "$inv" == "--Inverse" ]] ; then
|
| 28 |
+
parallel --gnu < $obase.$$ -j10 --pipe --blocksize 250M "sort -S 10G | gzip > $obase.{#}.gz" &
|
| 29 |
+
else
|
| 30 |
+
gzip < $obase.$$ > $obase.scored.gz_ &
|
| 31 |
+
fi
|
| 32 |
+
|
| 33 |
+
if [[ $phrases != "-" && $phrases != "/dev/stdin" ]] ; then
|
| 34 |
+
$binary $phrases <(zcat -f $lex) $obase.$$ $smoothing $inv || exit 1
|
| 35 |
+
else
|
| 36 |
+
$binary /dev/stdin <(zcat -f $lex) $obase.$$ $smoothing $inv || exit 1
|
| 37 |
+
fi
|
| 38 |
+
|
| 39 |
+
if [ $? ] ; then exit $?; fi
|
| 40 |
+
wait
|
| 41 |
+
exit $?;
|
mosesdecoder/contrib/m4m/scripts/moses.transfer-weights.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
# Combines the system definition from one .ini file with the weights contained
|
| 4 |
+
# in another. Works for the new moses.ini format with fully named feature
|
| 5 |
+
# functions. Writes the new .ini file to stdout
|
| 6 |
+
# Script by Ulrich Germann.
|
| 7 |
+
|
| 8 |
+
import re,sys,os
|
| 9 |
+
from optparse import OptionParser
|
| 10 |
+
|
| 11 |
+
SectionHeaderPattern = re.compile(r'^\[(.*)\]\s*$')
|
| 12 |
+
def read_ini(filename):
|
| 13 |
+
'''
|
| 14 |
+
Reads a moses.ini file and returns a dictionary mapping
|
| 15 |
+
from section names to a list of lines contained in that section.
|
| 16 |
+
'''
|
| 17 |
+
AllSections = {}
|
| 18 |
+
CurSection = AllSections.setdefault('',[])
|
| 19 |
+
for line in open(filename):
|
| 20 |
+
line = line.strip()
|
| 21 |
+
m = SectionHeaderPattern.match(line)
|
| 22 |
+
if m:
|
| 23 |
+
CurSection = AllSections.setdefault(m.group(1),[])
|
| 24 |
+
elif len(line):
|
| 25 |
+
CurSection.append(line)
|
| 26 |
+
pass
|
| 27 |
+
pass
|
| 28 |
+
return AllSections
|
| 29 |
+
|
| 30 |
+
parser = OptionParser()
|
| 31 |
+
parser.add_option("-s", "--system", dest = "system",
|
| 32 |
+
help = "moses.ini file defining the system")
|
| 33 |
+
parser.add_option("-w", "--weights", dest = "weight",
|
| 34 |
+
help = "moses.ini file defining the system")
|
| 35 |
+
|
| 36 |
+
opts,args = parser.parse_args()
|
| 37 |
+
|
| 38 |
+
system = read_ini(opts.system)
|
| 39 |
+
weight = read_ini(opts.weight)
|
| 40 |
+
|
| 41 |
+
for s in system:
|
| 42 |
+
if len(s) == 0 or s[0:6] == 'weight': continue
|
| 43 |
+
print "[%s]"%s
|
| 44 |
+
print "\n".join(system[s])
|
| 45 |
+
print
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
if 'weight' in weight:
|
| 49 |
+
print '[weight]'
|
| 50 |
+
print "\n".join(weight['weight'])
|
| 51 |
+
else:
|
| 52 |
+
for s in weight:
|
| 53 |
+
if s[0:6] != 'weight': continue
|
| 54 |
+
print "[%s]"%s
|
| 55 |
+
print "\n".join(system[s])
|
| 56 |
+
print
|
| 57 |
+
pass
|
| 58 |
+
pass
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
mosesdecoder/contrib/m4m/util/Jamfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
external-lib bzip2 ;
|
| 2 |
+
external-lib zlib ;
|
| 3 |
+
|
| 4 |
+
exe merge-sorted :
|
| 5 |
+
merge-sorted.cc
|
| 6 |
+
$(TOP)/moses/TranslationModel/UG/mm//mm
|
| 7 |
+
$(TOP)/moses/TranslationModel/UG/generic//generic
|
| 8 |
+
$(TOP)//boost_iostreams
|
| 9 |
+
$(TOP)//boost_program_options
|
| 10 |
+
;
|
| 11 |
+
|
| 12 |
+
|
mosesdecoder/contrib/memscore/Makefile.in
ADDED
|
@@ -0,0 +1,581 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Makefile.in generated by automake 1.9.6 from Makefile.am.
|
| 2 |
+
# @configure_input@
|
| 3 |
+
|
| 4 |
+
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
|
| 5 |
+
# 2003, 2004, 2005 Free Software Foundation, Inc.
|
| 6 |
+
# This Makefile.in is free software; the Free Software Foundation
|
| 7 |
+
# gives unlimited permission to copy and/or distribute it,
|
| 8 |
+
# with or without modifications, as long as this notice is preserved.
|
| 9 |
+
|
| 10 |
+
# This program is distributed in the hope that it will be useful,
|
| 11 |
+
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
| 12 |
+
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
| 13 |
+
# PARTICULAR PURPOSE.
|
| 14 |
+
|
| 15 |
+
@SET_MAKE@
|
| 16 |
+
|
| 17 |
+
# memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 18 |
+
# Christian Hardmeier, FBK-irst, Trento, 2010
|
| 19 |
+
# $Id$
|
| 20 |
+
|
| 21 |
+
srcdir = @srcdir@
|
| 22 |
+
top_srcdir = @top_srcdir@
|
| 23 |
+
VPATH = @srcdir@
|
| 24 |
+
pkgdatadir = $(datadir)/@PACKAGE@
|
| 25 |
+
pkglibdir = $(libdir)/@PACKAGE@
|
| 26 |
+
pkgincludedir = $(includedir)/@PACKAGE@
|
| 27 |
+
top_builddir = .
|
| 28 |
+
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
| 29 |
+
INSTALL = @INSTALL@
|
| 30 |
+
install_sh_DATA = $(install_sh) -c -m 644
|
| 31 |
+
install_sh_PROGRAM = $(install_sh) -c
|
| 32 |
+
install_sh_SCRIPT = $(install_sh) -c
|
| 33 |
+
INSTALL_HEADER = $(INSTALL_DATA)
|
| 34 |
+
transform = $(program_transform_name)
|
| 35 |
+
NORMAL_INSTALL = :
|
| 36 |
+
PRE_INSTALL = :
|
| 37 |
+
POST_INSTALL = :
|
| 38 |
+
NORMAL_UNINSTALL = :
|
| 39 |
+
PRE_UNINSTALL = :
|
| 40 |
+
POST_UNINSTALL = :
|
| 41 |
+
bin_PROGRAMS = memscore$(EXEEXT)
|
| 42 |
+
@IRSTLM_TRUE@am__append_1 = phraselm.cpp phraselm.h
|
| 43 |
+
@CHANNEL_SCORER_TRUE@am__append_2 = channel-scorer.cpp channel-scorer.h
|
| 44 |
+
subdir = .
|
| 45 |
+
DIST_COMMON = $(am__configure_deps) $(srcdir)/Makefile.am \
|
| 46 |
+
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
|
| 47 |
+
$(top_srcdir)/configure depcomp install-sh missing
|
| 48 |
+
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
| 49 |
+
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_boost_base.m4 \
|
| 50 |
+
$(top_srcdir)/configure.ac
|
| 51 |
+
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
| 52 |
+
$(ACLOCAL_M4)
|
| 53 |
+
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
| 54 |
+
configure.lineno configure.status.lineno
|
| 55 |
+
mkinstalldirs = $(install_sh) -d
|
| 56 |
+
CONFIG_HEADER = config.h
|
| 57 |
+
CONFIG_CLEAN_FILES =
|
| 58 |
+
am__installdirs = "$(DESTDIR)$(bindir)"
|
| 59 |
+
binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
|
| 60 |
+
PROGRAMS = $(bin_PROGRAMS)
|
| 61 |
+
am__memscore_SOURCES_DIST = datastorage.h memscore.h phrasetable.h \
|
| 62 |
+
scorer.h scorer-impl.h statistic.h timestamp.h phrasetable.cpp \
|
| 63 |
+
memscore.cpp scorer.cpp lexdecom.cpp lexdecom.h phraselm.cpp \
|
| 64 |
+
phraselm.h channel-scorer.cpp channel-scorer.h
|
| 65 |
+
@IRSTLM_TRUE@am__objects_1 = phraselm.$(OBJEXT)
|
| 66 |
+
@CHANNEL_SCORER_TRUE@am__objects_2 = channel-scorer.$(OBJEXT)
|
| 67 |
+
am_memscore_OBJECTS = phrasetable.$(OBJEXT) memscore.$(OBJEXT) \
|
| 68 |
+
scorer.$(OBJEXT) lexdecom.$(OBJEXT) $(am__objects_1) \
|
| 69 |
+
$(am__objects_2)
|
| 70 |
+
memscore_OBJECTS = $(am_memscore_OBJECTS)
|
| 71 |
+
memscore_DEPENDENCIES =
|
| 72 |
+
DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
|
| 73 |
+
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
| 74 |
+
am__depfiles_maybe = depfiles
|
| 75 |
+
CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
| 76 |
+
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
|
| 77 |
+
CXXLD = $(CXX)
|
| 78 |
+
CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
|
| 79 |
+
-o $@
|
| 80 |
+
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
| 81 |
+
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
| 82 |
+
CCLD = $(CC)
|
| 83 |
+
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
|
| 84 |
+
SOURCES = $(memscore_SOURCES)
|
| 85 |
+
DIST_SOURCES = $(am__memscore_SOURCES_DIST)
|
| 86 |
+
ETAGS = etags
|
| 87 |
+
CTAGS = ctags
|
| 88 |
+
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
| 89 |
+
distdir = $(PACKAGE)-$(VERSION)
|
| 90 |
+
top_distdir = $(distdir)
|
| 91 |
+
am__remove_distdir = \
|
| 92 |
+
{ test ! -d $(distdir) \
|
| 93 |
+
|| { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
|
| 94 |
+
&& rm -fr $(distdir); }; }
|
| 95 |
+
DIST_ARCHIVES = $(distdir).tar.gz
|
| 96 |
+
GZIP_ENV = --best
|
| 97 |
+
distuninstallcheck_listfiles = find . -type f -print
|
| 98 |
+
distcleancheck_listfiles = find . -type f -print
|
| 99 |
+
ACLOCAL = @ACLOCAL@
|
| 100 |
+
AMDEP_FALSE = @AMDEP_FALSE@
|
| 101 |
+
AMDEP_TRUE = @AMDEP_TRUE@
|
| 102 |
+
AMTAR = @AMTAR@
|
| 103 |
+
AUTOCONF = @AUTOCONF@
|
| 104 |
+
AUTOHEADER = @AUTOHEADER@
|
| 105 |
+
AUTOMAKE = @AUTOMAKE@
|
| 106 |
+
AWK = @AWK@
|
| 107 |
+
BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
|
| 108 |
+
BOOST_LDFLAGS = @BOOST_LDFLAGS@
|
| 109 |
+
CC = @CC@
|
| 110 |
+
CCDEPMODE = @CCDEPMODE@
|
| 111 |
+
CFLAGS = @CFLAGS@
|
| 112 |
+
CHANNEL_SCORER_FALSE = @CHANNEL_SCORER_FALSE@
|
| 113 |
+
CHANNEL_SCORER_TRUE = @CHANNEL_SCORER_TRUE@
|
| 114 |
+
CPPFLAGS = @CPPFLAGS@
|
| 115 |
+
CXX = @CXX@
|
| 116 |
+
CXXCPP = @CXXCPP@
|
| 117 |
+
CXXDEPMODE = @CXXDEPMODE@
|
| 118 |
+
CXXFLAGS = @CXXFLAGS@
|
| 119 |
+
CYGPATH_W = @CYGPATH_W@
|
| 120 |
+
DEFS = @DEFS@
|
| 121 |
+
DEPDIR = @DEPDIR@
|
| 122 |
+
ECHO_C = @ECHO_C@
|
| 123 |
+
ECHO_N = @ECHO_N@
|
| 124 |
+
ECHO_T = @ECHO_T@
|
| 125 |
+
EGREP = @EGREP@
|
| 126 |
+
EXEEXT = @EXEEXT@
|
| 127 |
+
GREP = @GREP@
|
| 128 |
+
INSTALL_DATA = @INSTALL_DATA@
|
| 129 |
+
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
| 130 |
+
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
| 131 |
+
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
| 132 |
+
IRSTLM_FALSE = @IRSTLM_FALSE@
|
| 133 |
+
IRSTLM_TRUE = @IRSTLM_TRUE@
|
| 134 |
+
LDFLAGS = @LDFLAGS@
|
| 135 |
+
LIBOBJS = @LIBOBJS@
|
| 136 |
+
LIBS = @LIBS@
|
| 137 |
+
LTLIBOBJS = @LTLIBOBJS@
|
| 138 |
+
MAKEINFO = @MAKEINFO@
|
| 139 |
+
OBJEXT = @OBJEXT@
|
| 140 |
+
PACKAGE = @PACKAGE@
|
| 141 |
+
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
| 142 |
+
PACKAGE_NAME = @PACKAGE_NAME@
|
| 143 |
+
PACKAGE_STRING = @PACKAGE_STRING@
|
| 144 |
+
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
| 145 |
+
PACKAGE_VERSION = @PACKAGE_VERSION@
|
| 146 |
+
PATH_SEPARATOR = @PATH_SEPARATOR@
|
| 147 |
+
SET_MAKE = @SET_MAKE@
|
| 148 |
+
SHELL = @SHELL@
|
| 149 |
+
STRIP = @STRIP@
|
| 150 |
+
VERSION = @VERSION@
|
| 151 |
+
ac_ct_CC = @ac_ct_CC@
|
| 152 |
+
ac_ct_CXX = @ac_ct_CXX@
|
| 153 |
+
am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
|
| 154 |
+
am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
|
| 155 |
+
am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
|
| 156 |
+
am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
|
| 157 |
+
am__include = @am__include@
|
| 158 |
+
am__leading_dot = @am__leading_dot@
|
| 159 |
+
am__quote = @am__quote@
|
| 160 |
+
am__tar = @am__tar@
|
| 161 |
+
am__untar = @am__untar@
|
| 162 |
+
bindir = @bindir@
|
| 163 |
+
build_alias = @build_alias@
|
| 164 |
+
datadir = @datadir@
|
| 165 |
+
datarootdir = @datarootdir@
|
| 166 |
+
docdir = @docdir@
|
| 167 |
+
dvidir = @dvidir@
|
| 168 |
+
exec_prefix = @exec_prefix@
|
| 169 |
+
host_alias = @host_alias@
|
| 170 |
+
htmldir = @htmldir@
|
| 171 |
+
includedir = @includedir@
|
| 172 |
+
infodir = @infodir@
|
| 173 |
+
install_sh = @install_sh@
|
| 174 |
+
libdir = @libdir@
|
| 175 |
+
libexecdir = @libexecdir@
|
| 176 |
+
localedir = @localedir@
|
| 177 |
+
localstatedir = @localstatedir@
|
| 178 |
+
mandir = @mandir@
|
| 179 |
+
mkdir_p = @mkdir_p@
|
| 180 |
+
oldincludedir = @oldincludedir@
|
| 181 |
+
pdfdir = @pdfdir@
|
| 182 |
+
prefix = @prefix@
|
| 183 |
+
program_transform_name = @program_transform_name@
|
| 184 |
+
psdir = @psdir@
|
| 185 |
+
sbindir = @sbindir@
|
| 186 |
+
sharedstatedir = @sharedstatedir@
|
| 187 |
+
sysconfdir = @sysconfdir@
|
| 188 |
+
target_alias = @target_alias@
|
| 189 |
+
ACLOCAL_AMFLAGS = -I m4
|
| 190 |
+
AUTOMAKE_OPTIONS = foreign
|
| 191 |
+
AM_CXXFLAGS = $(BOOST_CPPFLAGS) -Wall -ffast-math -ftrapping-math -fomit-frame-pointer
|
| 192 |
+
memscore_SOURCES = datastorage.h memscore.h phrasetable.h scorer.h \
|
| 193 |
+
scorer-impl.h statistic.h timestamp.h phrasetable.cpp \
|
| 194 |
+
memscore.cpp scorer.cpp lexdecom.cpp lexdecom.h \
|
| 195 |
+
$(am__append_1) $(am__append_2)
|
| 196 |
+
memscore_LDADD = $(IRSTLM_LIBS) $(GSL_LIBS)
|
| 197 |
+
all: config.h
|
| 198 |
+
$(MAKE) $(AM_MAKEFLAGS) all-am
|
| 199 |
+
|
| 200 |
+
.SUFFIXES:
|
| 201 |
+
.SUFFIXES: .cpp .o .obj
|
| 202 |
+
am--refresh:
|
| 203 |
+
@:
|
| 204 |
+
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
| 205 |
+
@for dep in $?; do \
|
| 206 |
+
case '$(am__configure_deps)' in \
|
| 207 |
+
*$$dep*) \
|
| 208 |
+
echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \
|
| 209 |
+
cd $(srcdir) && $(AUTOMAKE) --foreign \
|
| 210 |
+
&& exit 0; \
|
| 211 |
+
exit 1;; \
|
| 212 |
+
esac; \
|
| 213 |
+
done; \
|
| 214 |
+
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
|
| 215 |
+
cd $(top_srcdir) && \
|
| 216 |
+
$(AUTOMAKE) --foreign Makefile
|
| 217 |
+
.PRECIOUS: Makefile
|
| 218 |
+
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
| 219 |
+
@case '$?' in \
|
| 220 |
+
*config.status*) \
|
| 221 |
+
echo ' $(SHELL) ./config.status'; \
|
| 222 |
+
$(SHELL) ./config.status;; \
|
| 223 |
+
*) \
|
| 224 |
+
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
|
| 225 |
+
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
|
| 226 |
+
esac;
|
| 227 |
+
|
| 228 |
+
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
| 229 |
+
$(SHELL) ./config.status --recheck
|
| 230 |
+
|
| 231 |
+
$(top_srcdir)/configure: $(am__configure_deps)
|
| 232 |
+
cd $(srcdir) && $(AUTOCONF)
|
| 233 |
+
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
| 234 |
+
cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
|
| 235 |
+
|
| 236 |
+
config.h: stamp-h1
|
| 237 |
+
@if test ! -f $@; then \
|
| 238 |
+
rm -f stamp-h1; \
|
| 239 |
+
$(MAKE) stamp-h1; \
|
| 240 |
+
else :; fi
|
| 241 |
+
|
| 242 |
+
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
|
| 243 |
+
@rm -f stamp-h1
|
| 244 |
+
cd $(top_builddir) && $(SHELL) ./config.status config.h
|
| 245 |
+
$(srcdir)/config.h.in: $(am__configure_deps)
|
| 246 |
+
cd $(top_srcdir) && $(AUTOHEADER)
|
| 247 |
+
rm -f stamp-h1
|
| 248 |
+
touch $@
|
| 249 |
+
|
| 250 |
+
distclean-hdr:
|
| 251 |
+
-rm -f config.h stamp-h1
|
| 252 |
+
install-binPROGRAMS: $(bin_PROGRAMS)
|
| 253 |
+
@$(NORMAL_INSTALL)
|
| 254 |
+
test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)"
|
| 255 |
+
@list='$(bin_PROGRAMS)'; for p in $$list; do \
|
| 256 |
+
p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
|
| 257 |
+
if test -f $$p \
|
| 258 |
+
; then \
|
| 259 |
+
f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
|
| 260 |
+
echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
|
| 261 |
+
$(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
|
| 262 |
+
else :; fi; \
|
| 263 |
+
done
|
| 264 |
+
|
| 265 |
+
uninstall-binPROGRAMS:
|
| 266 |
+
@$(NORMAL_UNINSTALL)
|
| 267 |
+
@list='$(bin_PROGRAMS)'; for p in $$list; do \
|
| 268 |
+
f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
|
| 269 |
+
echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
|
| 270 |
+
rm -f "$(DESTDIR)$(bindir)/$$f"; \
|
| 271 |
+
done
|
| 272 |
+
|
| 273 |
+
clean-binPROGRAMS:
|
| 274 |
+
-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
|
| 275 |
+
memscore$(EXEEXT): $(memscore_OBJECTS) $(memscore_DEPENDENCIES)
|
| 276 |
+
@rm -f memscore$(EXEEXT)
|
| 277 |
+
$(CXXLINK) $(memscore_LDFLAGS) $(memscore_OBJECTS) $(memscore_LDADD) $(LIBS)
|
| 278 |
+
|
| 279 |
+
mostlyclean-compile:
|
| 280 |
+
-rm -f *.$(OBJEXT)
|
| 281 |
+
|
| 282 |
+
distclean-compile:
|
| 283 |
+
-rm -f *.tab.c
|
| 284 |
+
|
| 285 |
+
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/channel-scorer.Po@am__quote@
|
| 286 |
+
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lexdecom.Po@am__quote@
|
| 287 |
+
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memscore.Po@am__quote@
|
| 288 |
+
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phraselm.Po@am__quote@
|
| 289 |
+
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phrasetable.Po@am__quote@
|
| 290 |
+
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scorer.Po@am__quote@
|
| 291 |
+
|
| 292 |
+
.cpp.o:
|
| 293 |
+
@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
|
| 294 |
+
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
|
| 295 |
+
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
| 296 |
+
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
| 297 |
+
@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
|
| 298 |
+
|
| 299 |
+
.cpp.obj:
|
| 300 |
+
@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
|
| 301 |
+
@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
|
| 302 |
+
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
| 303 |
+
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
| 304 |
+
@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
|
| 305 |
+
uninstall-info-am:
|
| 306 |
+
|
| 307 |
+
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
| 308 |
+
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
| 309 |
+
unique=`for i in $$list; do \
|
| 310 |
+
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
| 311 |
+
done | \
|
| 312 |
+
$(AWK) ' { files[$$0] = 1; } \
|
| 313 |
+
END { for (i in files) print i; }'`; \
|
| 314 |
+
mkid -fID $$unique
|
| 315 |
+
tags: TAGS
|
| 316 |
+
|
| 317 |
+
TAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
| 318 |
+
$(TAGS_FILES) $(LISP)
|
| 319 |
+
tags=; \
|
| 320 |
+
here=`pwd`; \
|
| 321 |
+
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
| 322 |
+
unique=`for i in $$list; do \
|
| 323 |
+
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
| 324 |
+
done | \
|
| 325 |
+
$(AWK) ' { files[$$0] = 1; } \
|
| 326 |
+
END { for (i in files) print i; }'`; \
|
| 327 |
+
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
|
| 328 |
+
test -n "$$unique" || unique=$$empty_fix; \
|
| 329 |
+
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
| 330 |
+
$$tags $$unique; \
|
| 331 |
+
fi
|
| 332 |
+
ctags: CTAGS
|
| 333 |
+
CTAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
| 334 |
+
$(TAGS_FILES) $(LISP)
|
| 335 |
+
tags=; \
|
| 336 |
+
here=`pwd`; \
|
| 337 |
+
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
| 338 |
+
unique=`for i in $$list; do \
|
| 339 |
+
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
| 340 |
+
done | \
|
| 341 |
+
$(AWK) ' { files[$$0] = 1; } \
|
| 342 |
+
END { for (i in files) print i; }'`; \
|
| 343 |
+
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|
| 344 |
+
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
| 345 |
+
$$tags $$unique
|
| 346 |
+
|
| 347 |
+
GTAGS:
|
| 348 |
+
here=`$(am__cd) $(top_builddir) && pwd` \
|
| 349 |
+
&& cd $(top_srcdir) \
|
| 350 |
+
&& gtags -i $(GTAGS_ARGS) $$here
|
| 351 |
+
|
| 352 |
+
distclean-tags:
|
| 353 |
+
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
| 354 |
+
|
| 355 |
+
distdir: $(DISTFILES)
|
| 356 |
+
$(am__remove_distdir)
|
| 357 |
+
mkdir $(distdir)
|
| 358 |
+
$(mkdir_p) $(distdir)/m4
|
| 359 |
+
@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
|
| 360 |
+
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
|
| 361 |
+
list='$(DISTFILES)'; for file in $$list; do \
|
| 362 |
+
case $$file in \
|
| 363 |
+
$(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
|
| 364 |
+
$(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
|
| 365 |
+
esac; \
|
| 366 |
+
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
| 367 |
+
dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
|
| 368 |
+
if test "$$dir" != "$$file" && test "$$dir" != "."; then \
|
| 369 |
+
dir="/$$dir"; \
|
| 370 |
+
$(mkdir_p) "$(distdir)$$dir"; \
|
| 371 |
+
else \
|
| 372 |
+
dir=''; \
|
| 373 |
+
fi; \
|
| 374 |
+
if test -d $$d/$$file; then \
|
| 375 |
+
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
| 376 |
+
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
|
| 377 |
+
fi; \
|
| 378 |
+
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
|
| 379 |
+
else \
|
| 380 |
+
test -f $(distdir)/$$file \
|
| 381 |
+
|| cp -p $$d/$$file $(distdir)/$$file \
|
| 382 |
+
|| exit 1; \
|
| 383 |
+
fi; \
|
| 384 |
+
done
|
| 385 |
+
-find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
|
| 386 |
+
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
|
| 387 |
+
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
|
| 388 |
+
! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
|
| 389 |
+
|| chmod -R a+r $(distdir)
|
| 390 |
+
dist-gzip: distdir
|
| 391 |
+
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
| 392 |
+
$(am__remove_distdir)
|
| 393 |
+
|
| 394 |
+
dist-bzip2: distdir
|
| 395 |
+
tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
|
| 396 |
+
$(am__remove_distdir)
|
| 397 |
+
|
| 398 |
+
dist-tarZ: distdir
|
| 399 |
+
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
|
| 400 |
+
$(am__remove_distdir)
|
| 401 |
+
|
| 402 |
+
dist-shar: distdir
|
| 403 |
+
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
|
| 404 |
+
$(am__remove_distdir)
|
| 405 |
+
|
| 406 |
+
dist-zip: distdir
|
| 407 |
+
-rm -f $(distdir).zip
|
| 408 |
+
zip -rq $(distdir).zip $(distdir)
|
| 409 |
+
$(am__remove_distdir)
|
| 410 |
+
|
| 411 |
+
dist dist-all: distdir
|
| 412 |
+
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
| 413 |
+
$(am__remove_distdir)
|
| 414 |
+
|
| 415 |
+
# This target untars the dist file and tries a VPATH configuration. Then
|
| 416 |
+
# it guarantees that the distribution is self-contained by making another
|
| 417 |
+
# tarfile.
|
| 418 |
+
distcheck: dist
|
| 419 |
+
case '$(DIST_ARCHIVES)' in \
|
| 420 |
+
*.tar.gz*) \
|
| 421 |
+
GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
|
| 422 |
+
*.tar.bz2*) \
|
| 423 |
+
bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
|
| 424 |
+
*.tar.Z*) \
|
| 425 |
+
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
|
| 426 |
+
*.shar.gz*) \
|
| 427 |
+
GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
|
| 428 |
+
*.zip*) \
|
| 429 |
+
unzip $(distdir).zip ;;\
|
| 430 |
+
esac
|
| 431 |
+
chmod -R a-w $(distdir); chmod a+w $(distdir)
|
| 432 |
+
mkdir $(distdir)/_build
|
| 433 |
+
mkdir $(distdir)/_inst
|
| 434 |
+
chmod a-w $(distdir)
|
| 435 |
+
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
|
| 436 |
+
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
|
| 437 |
+
&& cd $(distdir)/_build \
|
| 438 |
+
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
|
| 439 |
+
$(DISTCHECK_CONFIGURE_FLAGS) \
|
| 440 |
+
&& $(MAKE) $(AM_MAKEFLAGS) \
|
| 441 |
+
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
|
| 442 |
+
&& $(MAKE) $(AM_MAKEFLAGS) check \
|
| 443 |
+
&& $(MAKE) $(AM_MAKEFLAGS) install \
|
| 444 |
+
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
|
| 445 |
+
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
|
| 446 |
+
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
|
| 447 |
+
distuninstallcheck \
|
| 448 |
+
&& chmod -R a-w "$$dc_install_base" \
|
| 449 |
+
&& ({ \
|
| 450 |
+
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
|
| 451 |
+
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
|
| 452 |
+
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
|
| 453 |
+
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
|
| 454 |
+
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
|
| 455 |
+
} || { rm -rf "$$dc_destdir"; exit 1; }) \
|
| 456 |
+
&& rm -rf "$$dc_destdir" \
|
| 457 |
+
&& $(MAKE) $(AM_MAKEFLAGS) dist \
|
| 458 |
+
&& rm -rf $(DIST_ARCHIVES) \
|
| 459 |
+
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck
|
| 460 |
+
$(am__remove_distdir)
|
| 461 |
+
@(echo "$(distdir) archives ready for distribution: "; \
|
| 462 |
+
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
|
| 463 |
+
sed -e '1{h;s/./=/g;p;x;}' -e '$${p;x;}'
|
| 464 |
+
distuninstallcheck:
|
| 465 |
+
@cd $(distuninstallcheck_dir) \
|
| 466 |
+
&& test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
|
| 467 |
+
|| { echo "ERROR: files left after uninstall:" ; \
|
| 468 |
+
if test -n "$(DESTDIR)"; then \
|
| 469 |
+
echo " (check DESTDIR support)"; \
|
| 470 |
+
fi ; \
|
| 471 |
+
$(distuninstallcheck_listfiles) ; \
|
| 472 |
+
exit 1; } >&2
|
| 473 |
+
distcleancheck: distclean
|
| 474 |
+
@if test '$(srcdir)' = . ; then \
|
| 475 |
+
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
|
| 476 |
+
exit 1 ; \
|
| 477 |
+
fi
|
| 478 |
+
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|
| 479 |
+
|| { echo "ERROR: files left in build directory after distclean:" ; \
|
| 480 |
+
$(distcleancheck_listfiles) ; \
|
| 481 |
+
exit 1; } >&2
|
| 482 |
+
check-am: all-am
|
| 483 |
+
check: check-am
|
| 484 |
+
all-am: Makefile $(PROGRAMS) config.h
|
| 485 |
+
installdirs:
|
| 486 |
+
for dir in "$(DESTDIR)$(bindir)"; do \
|
| 487 |
+
test -z "$$dir" || $(mkdir_p) "$$dir"; \
|
| 488 |
+
done
|
| 489 |
+
install: install-am
|
| 490 |
+
install-exec: install-exec-am
|
| 491 |
+
install-data: install-data-am
|
| 492 |
+
uninstall: uninstall-am
|
| 493 |
+
|
| 494 |
+
install-am: all-am
|
| 495 |
+
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
| 496 |
+
|
| 497 |
+
installcheck: installcheck-am
|
| 498 |
+
install-strip:
|
| 499 |
+
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
| 500 |
+
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
| 501 |
+
`test -z '$(STRIP)' || \
|
| 502 |
+
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
|
| 503 |
+
mostlyclean-generic:
|
| 504 |
+
|
| 505 |
+
clean-generic:
|
| 506 |
+
|
| 507 |
+
distclean-generic:
|
| 508 |
+
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
| 509 |
+
|
| 510 |
+
maintainer-clean-generic:
|
| 511 |
+
@echo "This command is intended for maintainers to use"
|
| 512 |
+
@echo "it deletes files that may require special tools to rebuild."
|
| 513 |
+
clean: clean-am
|
| 514 |
+
|
| 515 |
+
clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
|
| 516 |
+
|
| 517 |
+
distclean: distclean-am
|
| 518 |
+
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
| 519 |
+
-rm -rf ./$(DEPDIR)
|
| 520 |
+
-rm -f Makefile
|
| 521 |
+
distclean-am: clean-am distclean-compile distclean-generic \
|
| 522 |
+
distclean-hdr distclean-tags
|
| 523 |
+
|
| 524 |
+
dvi: dvi-am
|
| 525 |
+
|
| 526 |
+
dvi-am:
|
| 527 |
+
|
| 528 |
+
html: html-am
|
| 529 |
+
|
| 530 |
+
info: info-am
|
| 531 |
+
|
| 532 |
+
info-am:
|
| 533 |
+
|
| 534 |
+
install-data-am:
|
| 535 |
+
|
| 536 |
+
install-exec-am: install-binPROGRAMS
|
| 537 |
+
|
| 538 |
+
install-info: install-info-am
|
| 539 |
+
|
| 540 |
+
install-man:
|
| 541 |
+
|
| 542 |
+
installcheck-am:
|
| 543 |
+
|
| 544 |
+
maintainer-clean: maintainer-clean-am
|
| 545 |
+
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
| 546 |
+
-rm -rf $(top_srcdir)/autom4te.cache
|
| 547 |
+
-rm -rf ./$(DEPDIR)
|
| 548 |
+
-rm -f Makefile
|
| 549 |
+
maintainer-clean-am: distclean-am maintainer-clean-generic
|
| 550 |
+
|
| 551 |
+
mostlyclean: mostlyclean-am
|
| 552 |
+
|
| 553 |
+
mostlyclean-am: mostlyclean-compile mostlyclean-generic
|
| 554 |
+
|
| 555 |
+
pdf: pdf-am
|
| 556 |
+
|
| 557 |
+
pdf-am:
|
| 558 |
+
|
| 559 |
+
ps: ps-am
|
| 560 |
+
|
| 561 |
+
ps-am:
|
| 562 |
+
|
| 563 |
+
uninstall-am: uninstall-binPROGRAMS uninstall-info-am
|
| 564 |
+
|
| 565 |
+
.PHONY: CTAGS GTAGS all all-am am--refresh check check-am clean \
|
| 566 |
+
clean-binPROGRAMS clean-generic ctags dist dist-all dist-bzip2 \
|
| 567 |
+
dist-gzip dist-shar dist-tarZ dist-zip distcheck distclean \
|
| 568 |
+
distclean-compile distclean-generic distclean-hdr \
|
| 569 |
+
distclean-tags distcleancheck distdir distuninstallcheck dvi \
|
| 570 |
+
dvi-am html html-am info info-am install install-am \
|
| 571 |
+
install-binPROGRAMS install-data install-data-am install-exec \
|
| 572 |
+
install-exec-am install-info install-info-am install-man \
|
| 573 |
+
install-strip installcheck installcheck-am installdirs \
|
| 574 |
+
maintainer-clean maintainer-clean-generic mostlyclean \
|
| 575 |
+
mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
|
| 576 |
+
tags uninstall uninstall-am uninstall-binPROGRAMS \
|
| 577 |
+
uninstall-info-am
|
| 578 |
+
|
| 579 |
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
| 580 |
+
# Otherwise a system limit (for SysV at least) may be exceeded.
|
| 581 |
+
.NOEXPORT:
|
mosesdecoder/contrib/memscore/configure.ac
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
# Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
# $Id$
|
| 4 |
+
|
| 5 |
+
# Process this file with autoconf to produce a configure script.
|
| 6 |
+
|
| 7 |
+
AC_INIT([memscore], [1.0], [hardmeier at fbk.eu])
|
| 8 |
+
AM_INIT_AUTOMAKE
|
| 9 |
+
AC_LANG([C++])
|
| 10 |
+
|
| 11 |
+
AC_ARG_WITH(irstlm,
|
| 12 |
+
[AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to the IRSTLM toolkit])],
|
| 13 |
+
[with_irstlm=$withval],
|
| 14 |
+
[with_irstlm=check])
|
| 15 |
+
|
| 16 |
+
AC_ARG_WITH([gsl],
|
| 17 |
+
[AC_HELP_STRING([--with-gsl=PATH], [path to the GSL library])],
|
| 18 |
+
[with_gsl=$withval
|
| 19 |
+
CPPFLAGS="$CPPFLAGS -I$with_gsl/include"
|
| 20 |
+
LDFLAGS="$LDFLAGS -L$with_gsl/lib"],
|
| 21 |
+
[with_gsl=check])
|
| 22 |
+
|
| 23 |
+
AC_ARG_ENABLE([channel],
|
| 24 |
+
[AC_HELP_STRING([--enable-channel], [feature not yet publicly available])],
|
| 25 |
+
[AC_DEFINE(ENABLE_CHANNEL_SCORER, [], [Define to enable channel scorer])],
|
| 26 |
+
[enable_channel=no])
|
| 27 |
+
|
| 28 |
+
AC_PREREQ([2.63])
|
| 29 |
+
AC_CONFIG_SRCDIR([memscore.cpp])
|
| 30 |
+
AC_CONFIG_HEADERS([config.h])
|
| 31 |
+
|
| 32 |
+
# Checks for programs.
|
| 33 |
+
AC_PROG_CXX
|
| 34 |
+
AC_PROG_CC
|
| 35 |
+
|
| 36 |
+
# Checks for libraries.
|
| 37 |
+
AX_BOOST_BASE([1.35.0])
|
| 38 |
+
|
| 39 |
+
AC_CHECK_LIB([m], [cos])
|
| 40 |
+
AC_CHECK_LIB([z], [gzopen])
|
| 41 |
+
|
| 42 |
+
have_gsl=yes
|
| 43 |
+
AC_CHECK_LIB([gslcblas],[cblas_dgemm], [], [have_gsl=no])
|
| 44 |
+
AC_CHECK_LIB([gsl],[gsl_blas_dgemm], [], [have_gsl=no])
|
| 45 |
+
|
| 46 |
+
AS_IF([test x$with_irstlm = xcheck],
|
| 47 |
+
[AC_CHECK_HEADER([n_gram.h],
|
| 48 |
+
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
|
| 49 |
+
[with_irstlm=no])]
|
| 50 |
+
,
|
| 51 |
+
[SAVE_CPPFLAGS="$CPPFLAGS"
|
| 52 |
+
CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include"
|
| 53 |
+
|
| 54 |
+
AC_CHECK_HEADER(n_gram.h,
|
| 55 |
+
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
|
| 56 |
+
[AC_MSG_ERROR([Cannot find IRSTLM!])])
|
| 57 |
+
|
| 58 |
+
MY_ARCH=`uname -m`
|
| 59 |
+
LIB_IRSTLM="-lirstlm"
|
| 60 |
+
LDFLAGS="$LDFLAGS -L${with_irstlm}/lib/${MY_ARCH}"
|
| 61 |
+
LIBS="$LIBS $LIB_IRSTLM"
|
| 62 |
+
FMTLIBS="$FMTLIBS libirstlm.a"]
|
| 63 |
+
)
|
| 64 |
+
AM_CONDITIONAL([IRSTLM], [test x$with_irstlm != xno])
|
| 65 |
+
|
| 66 |
+
AS_IF([test x$enable_channel = xyes],
|
| 67 |
+
[AS_IF([test x$with_irstlm = xno || test x$have_gsl = xno],
|
| 68 |
+
[AC_MSG_ERROR([The channel scorer needs both GSL and irstlm.])])])
|
| 69 |
+
|
| 70 |
+
# Checks for header files.
|
| 71 |
+
#AC_CHECK_HEADERS([fenv.h sys/time.h])
|
| 72 |
+
|
| 73 |
+
# Checks for typedefs, structures, and compiler characteristics.
|
| 74 |
+
AC_TYPE_SIZE_T
|
| 75 |
+
AC_CHECK_TYPES([ptrdiff_t])
|
| 76 |
+
|
| 77 |
+
# Checks for library functions.
|
| 78 |
+
#AC_FUNC_MALLOC
|
| 79 |
+
#AC_CHECK_FUNCS([getpagesize gettimeofday])
|
| 80 |
+
|
| 81 |
+
AM_CONDITIONAL(CHANNEL_SCORER, test x$enable_channel = xyes)
|
| 82 |
+
|
| 83 |
+
AC_CONFIG_FILES([Makefile])
|
| 84 |
+
AC_OUTPUT
|
mosesdecoder/contrib/memscore/lexdecom.h
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* File: lexdecom.h
|
| 3 |
+
* Author: Felipe Sánchez-Martínez, Universitat d'Alacant <fsanchez@dlsi.ua.es>
|
| 4 |
+
*
|
| 5 |
+
* Created on 2010/01/27
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#ifndef _LEXDECOM_H
|
| 9 |
+
#define _LEXDECOM_H
|
| 10 |
+
|
| 11 |
+
#include "phrasetable.h"
|
| 12 |
+
#include "scorer.h"
|
| 13 |
+
|
| 14 |
+
class LexicalDecompositionPhraseScorer : public PhraseScorer
|
| 15 |
+
{
|
| 16 |
+
private:
|
| 17 |
+
explicit LexicalDecompositionPhraseScorer(PhraseTable &pd, bool reverse, const String &lwfile,
|
| 18 |
+
const char *argv[], int &argp, const PhraseScorerFactory &ptf);
|
| 19 |
+
|
| 20 |
+
virtual void do_score_phrases();
|
| 21 |
+
virtual Score do_get_score(const PhraseTable::const_iterator &it);
|
| 22 |
+
|
| 23 |
+
Score get_weight(const String &s_src, const String &s_tgt) const;
|
| 24 |
+
Score get_weight(Count src, Count tgt) const;
|
| 25 |
+
|
| 26 |
+
typedef std::map<std::pair<Count,Count>, Score> WeightMapType_;
|
| 27 |
+
|
| 28 |
+
WeightMapType_ weight_map_;
|
| 29 |
+
|
| 30 |
+
// p(J|I) = probability of source-length J given target-length I
|
| 31 |
+
std::map<unsigned, std::map<unsigned, Score> > prob_srclen_tgtlen_;
|
| 32 |
+
|
| 33 |
+
Score get_noisy_or_combination(Count src_word, PhraseInfo &tgt_phrase);
|
| 34 |
+
|
| 35 |
+
PhraseScorer* black_box_scorer;
|
| 36 |
+
|
| 37 |
+
public:
|
| 38 |
+
static PhraseScorer *create_scorer(const char *argv[], int &argp, bool reverse, const PhraseScorerFactory &ptf);
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
#endif /* _LEXDECOM_H */
|
mosesdecoder/contrib/memscore/memscore.cpp
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
// Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
// $Id$
|
| 4 |
+
|
| 5 |
+
#include <iostream>
|
| 6 |
+
#include <vector>
|
| 7 |
+
|
| 8 |
+
#include "phrasetable.h"
|
| 9 |
+
#include "scorer.h"
|
| 10 |
+
|
| 11 |
+
const char *progname;
|
| 12 |
+
|
| 13 |
+
typedef PhrasePairInfo::AlignmentVector::value_type VP;
|
| 14 |
+
|
| 15 |
+
bool cmp_counts(const VP &a1, const VP &a2);
|
| 16 |
+
int main(int argc, const char *argv[]);
|
| 17 |
+
|
| 18 |
+
bool cmp_counts(const VP &a1, const VP &a2)
|
| 19 |
+
{
|
| 20 |
+
return a1.second < a2.second;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
int main(int argc, const char *argv[])
|
| 24 |
+
{
|
| 25 |
+
progname = argv[0];
|
| 26 |
+
|
| 27 |
+
if(argc == 1) {
|
| 28 |
+
std::cerr << "No scorers specified." << std::endl;
|
| 29 |
+
usage();
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
MemoryPhraseTable pt;
|
| 33 |
+
PhraseScorerFactory psf(pt);
|
| 34 |
+
|
| 35 |
+
typedef std::vector<PhraseScorer *> ScorerList;
|
| 36 |
+
ScorerList scorers;
|
| 37 |
+
|
| 38 |
+
for(int argp = 1; argp < argc; ) {
|
| 39 |
+
bool reverse;
|
| 40 |
+
if(!strcmp(argv[argp], "-s"))
|
| 41 |
+
reverse = false;
|
| 42 |
+
else if(!strcmp(argv[argp], "-r"))
|
| 43 |
+
reverse = true;
|
| 44 |
+
else
|
| 45 |
+
usage();
|
| 46 |
+
|
| 47 |
+
scorers.push_back(psf.create_scorer(argv, ++argp, reverse));
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
pt.load_data(std::cin);
|
| 51 |
+
pt.compute_phrase_statistics();
|
| 52 |
+
|
| 53 |
+
for(ScorerList::iterator s = scorers.begin(); s != scorers.end(); ++s)
|
| 54 |
+
(*s)->score_phrases();
|
| 55 |
+
|
| 56 |
+
for(PhrasePairCounts::const_iterator it = pt.raw_begin(); it != pt.raw_end(); ++it) {
|
| 57 |
+
PhrasePairInfo ppi(it);
|
| 58 |
+
Phrase src = ppi.get_src();
|
| 59 |
+
Phrase tgt = ppi.get_tgt();
|
| 60 |
+
const PhrasePairInfo::AlignmentVector av = ppi.get_alignments();
|
| 61 |
+
|
| 62 |
+
PhraseAlignment alig = std::max_element(av.begin(), av.end(), cmp_counts)->first;
|
| 63 |
+
|
| 64 |
+
std::cout << pt.get_src_phrase(src) << " ||| " << pt.get_tgt_phrase(tgt) << " ||| " << alig << " |||";
|
| 65 |
+
|
| 66 |
+
for(ScorerList::iterator s = scorers.begin(); s != scorers.end(); ++s)
|
| 67 |
+
std::cout << ' ' << (*s)->get_score(it);
|
| 68 |
+
std::cout << '\n'; // don't use std::endl to avoid flushing
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
void usage()
|
| 73 |
+
{
|
| 74 |
+
std::cerr << "Usage: " << progname << " <scorer1> <scorer2> ..." << std::endl <<
|
| 75 |
+
" where each scorer is specified as" << std::endl <<
|
| 76 |
+
" -s <scorer> <args> to estimate p(s|t)" << std::endl <<
|
| 77 |
+
" -r <scorer> <args> to estimate p(t|s)" << std::endl << std::endl;
|
| 78 |
+
|
| 79 |
+
std::cerr << "Implemented scorers:" << std::endl;
|
| 80 |
+
|
| 81 |
+
const std::vector<String> &v = PhraseScorerFactory::scorer_list();
|
| 82 |
+
std::copy(v.begin(), v.end(), std::ostream_iterator<std::string>(std::cerr, "\n"));
|
| 83 |
+
|
| 84 |
+
exit(1);
|
| 85 |
+
}
|
mosesdecoder/contrib/memscore/memscore.h
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
// Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
// $Id$
|
| 4 |
+
|
| 5 |
+
#ifndef MEMSCORE_H
|
| 6 |
+
#define MEMSCORE_H
|
| 7 |
+
|
| 8 |
+
#include <sstream>
|
| 9 |
+
#include <string>
|
| 10 |
+
#include <utility>
|
| 11 |
+
|
| 12 |
+
#include "config.h"
|
| 13 |
+
|
| 14 |
+
#ifndef HAVE_PTRDIFF_T
|
| 15 |
+
typedef long ptrdiff_t;
|
| 16 |
+
#endif
|
| 17 |
+
|
| 18 |
+
#ifdef __GNUC__
|
| 19 |
+
#define NORETURN __attribute__ ((noreturn))
|
| 20 |
+
#else
|
| 21 |
+
#define NORETURN
|
| 22 |
+
#endif
|
| 23 |
+
|
| 24 |
+
void usage() NORETURN;
|
| 25 |
+
|
| 26 |
+
typedef double Score;
|
| 27 |
+
typedef unsigned int Count;
|
| 28 |
+
typedef unsigned int Phrase;
|
| 29 |
+
typedef ptrdiff_t DataIndex;
|
| 30 |
+
typedef std::pair<Phrase,Phrase> PhrasePair;
|
| 31 |
+
typedef char *PhrasePairData;
|
| 32 |
+
typedef std::string String;
|
| 33 |
+
typedef std::istringstream IStringStream;
|
| 34 |
+
|
| 35 |
+
/* phrasetable.h */
|
| 36 |
+
|
| 37 |
+
class PhraseText;
|
| 38 |
+
class PhraseInfo;
|
| 39 |
+
class PhraseInfoList;
|
| 40 |
+
class PhraseAlignment;
|
| 41 |
+
class PhrasePairInfo;
|
| 42 |
+
class PhraseTable;
|
| 43 |
+
|
| 44 |
+
/* scorer.h */
|
| 45 |
+
|
| 46 |
+
class PhraseScorer;
|
| 47 |
+
|
| 48 |
+
/* statistic.h */
|
| 49 |
+
|
| 50 |
+
class PhraseStatistic;
|
| 51 |
+
|
| 52 |
+
/* IRSTLM */
|
| 53 |
+
|
| 54 |
+
class lmtable;
|
| 55 |
+
class ngram;
|
| 56 |
+
|
| 57 |
+
#endif
|
mosesdecoder/contrib/memscore/missing
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#! /bin/sh
|
| 2 |
+
# Common stub for a few missing GNU programs while installing.
|
| 3 |
+
|
| 4 |
+
scriptversion=2005-06-08.21
|
| 5 |
+
|
| 6 |
+
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005
|
| 7 |
+
# Free Software Foundation, Inc.
|
| 8 |
+
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
| 9 |
+
|
| 10 |
+
# This program is free software; you can redistribute it and/or modify
|
| 11 |
+
# it under the terms of the GNU General Public License as published by
|
| 12 |
+
# the Free Software Foundation; either version 2, or (at your option)
|
| 13 |
+
# any later version.
|
| 14 |
+
|
| 15 |
+
# This program is distributed in the hope that it will be useful,
|
| 16 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 17 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 18 |
+
# GNU General Public License for more details.
|
| 19 |
+
|
| 20 |
+
# You should have received a copy of the GNU General Public License
|
| 21 |
+
# along with this program; if not, write to the Free Software
|
| 22 |
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
| 23 |
+
# 02110-1301, USA.
|
| 24 |
+
|
| 25 |
+
# As a special exception to the GNU General Public License, if you
|
| 26 |
+
# distribute this file as part of a program that contains a
|
| 27 |
+
# configuration script generated by Autoconf, you may include it under
|
| 28 |
+
# the same distribution terms that you use for the rest of that program.
|
| 29 |
+
|
| 30 |
+
if test $# -eq 0; then
|
| 31 |
+
echo 1>&2 "Try \`$0 --help' for more information"
|
| 32 |
+
exit 1
|
| 33 |
+
fi
|
| 34 |
+
|
| 35 |
+
run=:
|
| 36 |
+
|
| 37 |
+
# In the cases where this matters, `missing' is being run in the
|
| 38 |
+
# srcdir already.
|
| 39 |
+
if test -f configure.ac; then
|
| 40 |
+
configure_ac=configure.ac
|
| 41 |
+
else
|
| 42 |
+
configure_ac=configure.in
|
| 43 |
+
fi
|
| 44 |
+
|
| 45 |
+
msg="missing on your system"
|
| 46 |
+
|
| 47 |
+
case "$1" in
|
| 48 |
+
--run)
|
| 49 |
+
# Try to run requested program, and just exit if it succeeds.
|
| 50 |
+
run=
|
| 51 |
+
shift
|
| 52 |
+
"$@" && exit 0
|
| 53 |
+
# Exit code 63 means version mismatch. This often happens
|
| 54 |
+
# when the user try to use an ancient version of a tool on
|
| 55 |
+
# a file that requires a minimum version. In this case we
|
| 56 |
+
# we should proceed has if the program had been absent, or
|
| 57 |
+
# if --run hadn't been passed.
|
| 58 |
+
if test $? = 63; then
|
| 59 |
+
run=:
|
| 60 |
+
msg="probably too old"
|
| 61 |
+
fi
|
| 62 |
+
;;
|
| 63 |
+
|
| 64 |
+
-h|--h|--he|--hel|--help)
|
| 65 |
+
echo "\
|
| 66 |
+
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
| 67 |
+
|
| 68 |
+
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
|
| 69 |
+
error status if there is no known handling for PROGRAM.
|
| 70 |
+
|
| 71 |
+
Options:
|
| 72 |
+
-h, --help display this help and exit
|
| 73 |
+
-v, --version output version information and exit
|
| 74 |
+
--run try to run the given command, and emulate it if it fails
|
| 75 |
+
|
| 76 |
+
Supported PROGRAM values:
|
| 77 |
+
aclocal touch file \`aclocal.m4'
|
| 78 |
+
autoconf touch file \`configure'
|
| 79 |
+
autoheader touch file \`config.h.in'
|
| 80 |
+
automake touch all \`Makefile.in' files
|
| 81 |
+
bison create \`y.tab.[ch]', if possible, from existing .[ch]
|
| 82 |
+
flex create \`lex.yy.c', if possible, from existing .c
|
| 83 |
+
help2man touch the output file
|
| 84 |
+
lex create \`lex.yy.c', if possible, from existing .c
|
| 85 |
+
makeinfo touch the output file
|
| 86 |
+
tar try tar, gnutar, gtar, then tar without non-portable flags
|
| 87 |
+
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
|
| 88 |
+
|
| 89 |
+
Send bug reports to <bug-automake@gnu.org>."
|
| 90 |
+
exit $?
|
| 91 |
+
;;
|
| 92 |
+
|
| 93 |
+
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
| 94 |
+
echo "missing $scriptversion (GNU Automake)"
|
| 95 |
+
exit $?
|
| 96 |
+
;;
|
| 97 |
+
|
| 98 |
+
-*)
|
| 99 |
+
echo 1>&2 "$0: Unknown \`$1' option"
|
| 100 |
+
echo 1>&2 "Try \`$0 --help' for more information"
|
| 101 |
+
exit 1
|
| 102 |
+
;;
|
| 103 |
+
|
| 104 |
+
esac
|
| 105 |
+
|
| 106 |
+
# Now exit if we have it, but it failed. Also exit now if we
|
| 107 |
+
# don't have it and --version was passed (most likely to detect
|
| 108 |
+
# the program).
|
| 109 |
+
case "$1" in
|
| 110 |
+
lex|yacc)
|
| 111 |
+
# Not GNU programs, they don't have --version.
|
| 112 |
+
;;
|
| 113 |
+
|
| 114 |
+
tar)
|
| 115 |
+
if test -n "$run"; then
|
| 116 |
+
echo 1>&2 "ERROR: \`tar' requires --run"
|
| 117 |
+
exit 1
|
| 118 |
+
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
| 119 |
+
exit 1
|
| 120 |
+
fi
|
| 121 |
+
;;
|
| 122 |
+
|
| 123 |
+
*)
|
| 124 |
+
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
|
| 125 |
+
# We have it, but it failed.
|
| 126 |
+
exit 1
|
| 127 |
+
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
| 128 |
+
# Could not run --version or --help. This is probably someone
|
| 129 |
+
# running `$TOOL --version' or `$TOOL --help' to check whether
|
| 130 |
+
# $TOOL exists and not knowing $TOOL uses missing.
|
| 131 |
+
exit 1
|
| 132 |
+
fi
|
| 133 |
+
;;
|
| 134 |
+
esac
|
| 135 |
+
|
| 136 |
+
# If it does not exist, or fails to run (possibly an outdated version),
|
| 137 |
+
# try to emulate it.
|
| 138 |
+
case "$1" in
|
| 139 |
+
aclocal*)
|
| 140 |
+
echo 1>&2 "\
|
| 141 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 142 |
+
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
|
| 143 |
+
to install the \`Automake' and \`Perl' packages. Grab them from
|
| 144 |
+
any GNU archive site."
|
| 145 |
+
touch aclocal.m4
|
| 146 |
+
;;
|
| 147 |
+
|
| 148 |
+
autoconf)
|
| 149 |
+
echo 1>&2 "\
|
| 150 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 151 |
+
you modified \`${configure_ac}'. You might want to install the
|
| 152 |
+
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
|
| 153 |
+
archive site."
|
| 154 |
+
touch configure
|
| 155 |
+
;;
|
| 156 |
+
|
| 157 |
+
autoheader)
|
| 158 |
+
echo 1>&2 "\
|
| 159 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 160 |
+
you modified \`acconfig.h' or \`${configure_ac}'. You might want
|
| 161 |
+
to install the \`Autoconf' and \`GNU m4' packages. Grab them
|
| 162 |
+
from any GNU archive site."
|
| 163 |
+
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
|
| 164 |
+
test -z "$files" && files="config.h"
|
| 165 |
+
touch_files=
|
| 166 |
+
for f in $files; do
|
| 167 |
+
case "$f" in
|
| 168 |
+
*:*) touch_files="$touch_files "`echo "$f" |
|
| 169 |
+
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
|
| 170 |
+
*) touch_files="$touch_files $f.in";;
|
| 171 |
+
esac
|
| 172 |
+
done
|
| 173 |
+
touch $touch_files
|
| 174 |
+
;;
|
| 175 |
+
|
| 176 |
+
automake*)
|
| 177 |
+
echo 1>&2 "\
|
| 178 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 179 |
+
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
|
| 180 |
+
You might want to install the \`Automake' and \`Perl' packages.
|
| 181 |
+
Grab them from any GNU archive site."
|
| 182 |
+
find . -type f -name Makefile.am -print |
|
| 183 |
+
sed 's/\.am$/.in/' |
|
| 184 |
+
while read f; do touch "$f"; done
|
| 185 |
+
;;
|
| 186 |
+
|
| 187 |
+
autom4te)
|
| 188 |
+
echo 1>&2 "\
|
| 189 |
+
WARNING: \`$1' is needed, but is $msg.
|
| 190 |
+
You might have modified some files without having the
|
| 191 |
+
proper tools for further handling them.
|
| 192 |
+
You can get \`$1' as part of \`Autoconf' from any GNU
|
| 193 |
+
archive site."
|
| 194 |
+
|
| 195 |
+
file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
|
| 196 |
+
test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
|
| 197 |
+
if test -f "$file"; then
|
| 198 |
+
touch $file
|
| 199 |
+
else
|
| 200 |
+
test -z "$file" || exec >$file
|
| 201 |
+
echo "#! /bin/sh"
|
| 202 |
+
echo "# Created by GNU Automake missing as a replacement of"
|
| 203 |
+
echo "# $ $@"
|
| 204 |
+
echo "exit 0"
|
| 205 |
+
chmod +x $file
|
| 206 |
+
exit 1
|
| 207 |
+
fi
|
| 208 |
+
;;
|
| 209 |
+
|
| 210 |
+
bison|yacc)
|
| 211 |
+
echo 1>&2 "\
|
| 212 |
+
WARNING: \`$1' $msg. You should only need it if
|
| 213 |
+
you modified a \`.y' file. You may need the \`Bison' package
|
| 214 |
+
in order for those modifications to take effect. You can get
|
| 215 |
+
\`Bison' from any GNU archive site."
|
| 216 |
+
rm -f y.tab.c y.tab.h
|
| 217 |
+
if [ $# -ne 1 ]; then
|
| 218 |
+
eval LASTARG="\${$#}"
|
| 219 |
+
case "$LASTARG" in
|
| 220 |
+
*.y)
|
| 221 |
+
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
|
| 222 |
+
if [ -f "$SRCFILE" ]; then
|
| 223 |
+
cp "$SRCFILE" y.tab.c
|
| 224 |
+
fi
|
| 225 |
+
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
|
| 226 |
+
if [ -f "$SRCFILE" ]; then
|
| 227 |
+
cp "$SRCFILE" y.tab.h
|
| 228 |
+
fi
|
| 229 |
+
;;
|
| 230 |
+
esac
|
| 231 |
+
fi
|
| 232 |
+
if [ ! -f y.tab.h ]; then
|
| 233 |
+
echo >y.tab.h
|
| 234 |
+
fi
|
| 235 |
+
if [ ! -f y.tab.c ]; then
|
| 236 |
+
echo 'main() { return 0; }' >y.tab.c
|
| 237 |
+
fi
|
| 238 |
+
;;
|
| 239 |
+
|
| 240 |
+
lex|flex)
|
| 241 |
+
echo 1>&2 "\
|
| 242 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 243 |
+
you modified a \`.l' file. You may need the \`Flex' package
|
| 244 |
+
in order for those modifications to take effect. You can get
|
| 245 |
+
\`Flex' from any GNU archive site."
|
| 246 |
+
rm -f lex.yy.c
|
| 247 |
+
if [ $# -ne 1 ]; then
|
| 248 |
+
eval LASTARG="\${$#}"
|
| 249 |
+
case "$LASTARG" in
|
| 250 |
+
*.l)
|
| 251 |
+
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
|
| 252 |
+
if [ -f "$SRCFILE" ]; then
|
| 253 |
+
cp "$SRCFILE" lex.yy.c
|
| 254 |
+
fi
|
| 255 |
+
;;
|
| 256 |
+
esac
|
| 257 |
+
fi
|
| 258 |
+
if [ ! -f lex.yy.c ]; then
|
| 259 |
+
echo 'main() { return 0; }' >lex.yy.c
|
| 260 |
+
fi
|
| 261 |
+
;;
|
| 262 |
+
|
| 263 |
+
help2man)
|
| 264 |
+
echo 1>&2 "\
|
| 265 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 266 |
+
you modified a dependency of a manual page. You may need the
|
| 267 |
+
\`Help2man' package in order for those modifications to take
|
| 268 |
+
effect. You can get \`Help2man' from any GNU archive site."
|
| 269 |
+
|
| 270 |
+
file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
|
| 271 |
+
if test -z "$file"; then
|
| 272 |
+
file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
|
| 273 |
+
fi
|
| 274 |
+
if [ -f "$file" ]; then
|
| 275 |
+
touch $file
|
| 276 |
+
else
|
| 277 |
+
test -z "$file" || exec >$file
|
| 278 |
+
echo ".ab help2man is required to generate this page"
|
| 279 |
+
exit 1
|
| 280 |
+
fi
|
| 281 |
+
;;
|
| 282 |
+
|
| 283 |
+
makeinfo)
|
| 284 |
+
echo 1>&2 "\
|
| 285 |
+
WARNING: \`$1' is $msg. You should only need it if
|
| 286 |
+
you modified a \`.texi' or \`.texinfo' file, or any other file
|
| 287 |
+
indirectly affecting the aspect of the manual. The spurious
|
| 288 |
+
call might also be the consequence of using a buggy \`make' (AIX,
|
| 289 |
+
DU, IRIX). You might want to install the \`Texinfo' package or
|
| 290 |
+
the \`GNU make' package. Grab either from any GNU archive site."
|
| 291 |
+
# The file to touch is that specified with -o ...
|
| 292 |
+
file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
|
| 293 |
+
if test -z "$file"; then
|
| 294 |
+
# ... or it is the one specified with @setfilename ...
|
| 295 |
+
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
|
| 296 |
+
file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $infile`
|
| 297 |
+
# ... or it is derived from the source name (dir/f.texi becomes f.info)
|
| 298 |
+
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
|
| 299 |
+
fi
|
| 300 |
+
# If the file does not exist, the user really needs makeinfo;
|
| 301 |
+
# let's fail without touching anything.
|
| 302 |
+
test -f $file || exit 1
|
| 303 |
+
touch $file
|
| 304 |
+
;;
|
| 305 |
+
|
| 306 |
+
tar)
|
| 307 |
+
shift
|
| 308 |
+
|
| 309 |
+
# We have already tried tar in the generic part.
|
| 310 |
+
# Look for gnutar/gtar before invocation to avoid ugly error
|
| 311 |
+
# messages.
|
| 312 |
+
if (gnutar --version > /dev/null 2>&1); then
|
| 313 |
+
gnutar "$@" && exit 0
|
| 314 |
+
fi
|
| 315 |
+
if (gtar --version > /dev/null 2>&1); then
|
| 316 |
+
gtar "$@" && exit 0
|
| 317 |
+
fi
|
| 318 |
+
firstarg="$1"
|
| 319 |
+
if shift; then
|
| 320 |
+
case "$firstarg" in
|
| 321 |
+
*o*)
|
| 322 |
+
firstarg=`echo "$firstarg" | sed s/o//`
|
| 323 |
+
tar "$firstarg" "$@" && exit 0
|
| 324 |
+
;;
|
| 325 |
+
esac
|
| 326 |
+
case "$firstarg" in
|
| 327 |
+
*h*)
|
| 328 |
+
firstarg=`echo "$firstarg" | sed s/h//`
|
| 329 |
+
tar "$firstarg" "$@" && exit 0
|
| 330 |
+
;;
|
| 331 |
+
esac
|
| 332 |
+
fi
|
| 333 |
+
|
| 334 |
+
echo 1>&2 "\
|
| 335 |
+
WARNING: I can't seem to be able to run \`tar' with the given arguments.
|
| 336 |
+
You may want to install GNU tar or Free paxutils, or check the
|
| 337 |
+
command line arguments."
|
| 338 |
+
exit 1
|
| 339 |
+
;;
|
| 340 |
+
|
| 341 |
+
*)
|
| 342 |
+
echo 1>&2 "\
|
| 343 |
+
WARNING: \`$1' is needed, and is $msg.
|
| 344 |
+
You might have modified some files without having the
|
| 345 |
+
proper tools for further handling them. Check the \`README' file,
|
| 346 |
+
it often tells you about the needed prerequisites for installing
|
| 347 |
+
this package. You may also peek at any GNU archive site, in case
|
| 348 |
+
some other package would contain this missing \`$1' program."
|
| 349 |
+
exit 1
|
| 350 |
+
;;
|
| 351 |
+
esac
|
| 352 |
+
|
| 353 |
+
exit 0
|
| 354 |
+
|
| 355 |
+
# Local variables:
|
| 356 |
+
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
| 357 |
+
# time-stamp-start: "scriptversion="
|
| 358 |
+
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
| 359 |
+
# time-stamp-end: "$"
|
| 360 |
+
# End:
|
mosesdecoder/contrib/memscore/phraselm.h
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
// Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
// $Id$
|
| 4 |
+
|
| 5 |
+
#ifndef PHRASELM_H
|
| 6 |
+
#define PHRASELM_H
|
| 7 |
+
|
| 8 |
+
#include <cassert>
|
| 9 |
+
|
| 10 |
+
#include "memscore.h"
|
| 11 |
+
#include "phrasetable.h"
|
| 12 |
+
#include "statistic.h"
|
| 13 |
+
|
| 14 |
+
class lmtable;
|
| 15 |
+
|
| 16 |
+
class PhraseLanguageModel : public PhraseStatistic
|
| 17 |
+
{
|
| 18 |
+
protected:
|
| 19 |
+
String lmfile_;
|
| 20 |
+
Count score_idx_;
|
| 21 |
+
|
| 22 |
+
PhraseInfoList *phrase_info_list_;
|
| 23 |
+
|
| 24 |
+
void compute_lmscores(PhraseInfoList &phrase_info_list, bool closed_world);
|
| 25 |
+
|
| 26 |
+
public:
|
| 27 |
+
PhraseLanguageModel(String lmfile) : lmfile_(lmfile) {}
|
| 28 |
+
|
| 29 |
+
virtual void attach(PhraseInfoList &pilist);
|
| 30 |
+
virtual void compute_statistic();
|
| 31 |
+
|
| 32 |
+
virtual Score get_score(PhraseInfo &pi) {
|
| 33 |
+
assert(computation_done_);
|
| 34 |
+
return pi.data(score_idx_);
|
| 35 |
+
}
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
class ClosedPhraseLanguageModel : public PhraseLanguageModel
|
| 39 |
+
{
|
| 40 |
+
public:
|
| 41 |
+
ClosedPhraseLanguageModel(String lmfile) : PhraseLanguageModel(lmfile) {}
|
| 42 |
+
virtual void compute_statistic();
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
#endif
|
mosesdecoder/contrib/memscore/phrasetable.cpp
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
// Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
// $Id$
|
| 4 |
+
|
| 5 |
+
#include "phrasetable.h"
|
| 6 |
+
#include "statistic.h"
|
| 7 |
+
#include "timestamp.h"
|
| 8 |
+
|
| 9 |
+
#include <iostream>
|
| 10 |
+
#include <limits>
|
| 11 |
+
#include <sstream>
|
| 12 |
+
#include <string>
|
| 13 |
+
|
| 14 |
+
/* PhraseText */
|
| 15 |
+
|
| 16 |
+
PhraseText::DictionaryType_ PhraseText::dictionary_;
|
| 17 |
+
Count PhraseText::last_id_ = 1;
|
| 18 |
+
|
| 19 |
+
PhraseText::PhraseText(const String &s)
|
| 20 |
+
{
|
| 21 |
+
IStringStream is(s);
|
| 22 |
+
while(is.good()) {
|
| 23 |
+
String w;
|
| 24 |
+
getline(is, w, ' ');
|
| 25 |
+
Count *id = boost::fast_pool_allocator<Count>::allocate(1);
|
| 26 |
+
*id = index_word(w);
|
| 27 |
+
word_list_.push_back(id);
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
std::ostream &operator<<(std::ostream &os, const PhraseText &pt)
|
| 32 |
+
{
|
| 33 |
+
bool print_space = false;
|
| 34 |
+
for(PhraseText::const_string_iterator it = pt.string_begin(); it != pt.string_end(); it++) {
|
| 35 |
+
if(print_space)
|
| 36 |
+
os << ' ';
|
| 37 |
+
else
|
| 38 |
+
print_space = true;
|
| 39 |
+
|
| 40 |
+
os << *it;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
return os;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
/* PhraseAlignment */
|
| 47 |
+
|
| 48 |
+
PhraseAlignment::Alignment::AlignmentMapType_ PhraseAlignment::Alignment::alignment_map_;
|
| 49 |
+
PhraseAlignment::Alignment::AlignmentVectorType_ PhraseAlignment::Alignment::alignment_vector_;
|
| 50 |
+
|
| 51 |
+
PhraseAlignment::Alignment::Alignment(Count slen, Count tlen, const String &alignment) :
|
| 52 |
+
slen_(slen), tlen_(tlen), matrix_(slen * tlen, false)
|
| 53 |
+
{
|
| 54 |
+
assert(slen_ > 0 && slen_ < 10);
|
| 55 |
+
IStringStream is(alignment);
|
| 56 |
+
while(is.good()) {
|
| 57 |
+
String a;
|
| 58 |
+
getline(is, a, ' ');
|
| 59 |
+
IStringStream ap(a);
|
| 60 |
+
Count s, t;
|
| 61 |
+
char dash;
|
| 62 |
+
ap >> s >> dash >> t;
|
| 63 |
+
assert(s < slen && t < tlen);
|
| 64 |
+
assert(dash == '-');
|
| 65 |
+
matrix_[t * slen + s] = true;
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
Count PhraseAlignment::Alignment::index_alignment(Count slen, Count tlen, const String &alignment)
|
| 70 |
+
{
|
| 71 |
+
AlignmentTuple_ tup = boost::make_tuple(slen, tlen, alignment);
|
| 72 |
+
AlignmentMapType_::const_iterator it = alignment_map_.find(tup);
|
| 73 |
+
|
| 74 |
+
if(it == alignment_map_.end()) {
|
| 75 |
+
const Alignment *pa = new Alignment(slen, tlen, alignment);
|
| 76 |
+
Count index = alignment_vector_.size();
|
| 77 |
+
alignment_map_.insert(std::make_pair(tup, index));
|
| 78 |
+
alignment_vector_.push_back(pa);
|
| 79 |
+
return index;
|
| 80 |
+
} else
|
| 81 |
+
return it->second;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
std::ostream &operator<<(std::ostream &os, const PhraseAlignment::Alignment &pa)
|
| 85 |
+
{
|
| 86 |
+
bool print_space = false;
|
| 87 |
+
for(Count i = 0; i < pa.matrix_.size(); i++) {
|
| 88 |
+
if(print_space)
|
| 89 |
+
os << ' ';
|
| 90 |
+
else
|
| 91 |
+
print_space = true;
|
| 92 |
+
|
| 93 |
+
os << (i / pa.slen_) << '-' << (i % pa.slen_);
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
return os;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
std::ostream &operator<<(std::ostream &os, const PhraseAlignment &pa)
|
| 100 |
+
{
|
| 101 |
+
for(Count s = 0; s < pa.get_source_length(); s++) {
|
| 102 |
+
os << '(';
|
| 103 |
+
bool print_comma = false;
|
| 104 |
+
for(Count t = 0; t < pa.get_target_length(); t++) {
|
| 105 |
+
if(pa.is_aligned(s, t)) {
|
| 106 |
+
if(print_comma)
|
| 107 |
+
os << ',';
|
| 108 |
+
else
|
| 109 |
+
print_comma = true;
|
| 110 |
+
|
| 111 |
+
os << t;
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
os << ") ";
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
os << "|||";
|
| 118 |
+
|
| 119 |
+
for(Count t = 0; t < pa.get_target_length(); t++) {
|
| 120 |
+
os << " (";
|
| 121 |
+
bool print_comma = false;
|
| 122 |
+
for(Count s = 0; s < pa.get_source_length(); s++) {
|
| 123 |
+
if(pa.is_aligned(s, t)) {
|
| 124 |
+
if(print_comma)
|
| 125 |
+
os << ',';
|
| 126 |
+
else
|
| 127 |
+
print_comma = true;
|
| 128 |
+
|
| 129 |
+
os << s;
|
| 130 |
+
}
|
| 131 |
+
}
|
| 132 |
+
os << ')';
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
return os;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
/* PhrasePairInfo */
|
| 139 |
+
|
| 140 |
+
bool PhrasePairInfo::init_phase_ = true;
|
| 141 |
+
Count PhrasePairInfo::data_ncounts_ = COUNT_FREE_IDX;
|
| 142 |
+
Count PhrasePairInfo::data_nscores_ = SCORE_FREE_IDX;
|
| 143 |
+
const Count PhrasePairInfo::CONTINUATION_BIT = 1 << (std::numeric_limits<Count>::digits - 1);
|
| 144 |
+
|
| 145 |
+
PhrasePairInfo::PhrasePairInfo(Count src, Count tgt, Count alignment, Count count) :
|
| 146 |
+
src_(src), tgt_(tgt), data_(NULL), reverse_(false)
|
| 147 |
+
{
|
| 148 |
+
init_phase_ = false;
|
| 149 |
+
realloc_data(1);
|
| 150 |
+
count_data(COUNT_COUNT_IDX) = count;
|
| 151 |
+
Count *aligd = alignment_data(0);
|
| 152 |
+
aligd[0] = alignment;
|
| 153 |
+
aligd[1] = count;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
DataIndex PhrasePairInfo::register_score_data(Count size)
|
| 157 |
+
{
|
| 158 |
+
assert(init_phase_);
|
| 159 |
+
|
| 160 |
+
Count start = data_nscores_;
|
| 161 |
+
data_nscores_ += size;
|
| 162 |
+
return start;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
DataIndex PhrasePairInfo::register_count_data(Count size)
|
| 166 |
+
{
|
| 167 |
+
assert(init_phase_);
|
| 168 |
+
|
| 169 |
+
Count start = data_ncounts_;
|
| 170 |
+
data_ncounts_ += size;
|
| 171 |
+
return start;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
PhrasePairInfo::AlignmentVector PhrasePairInfo::get_alignments() const
|
| 175 |
+
{
|
| 176 |
+
PhrasePairInfo::AlignmentVector vec;
|
| 177 |
+
|
| 178 |
+
Count i = 0;
|
| 179 |
+
bool last;
|
| 180 |
+
do {
|
| 181 |
+
const Count *aligd = alignment_data(i++);
|
| 182 |
+
last = !(aligd[0] & CONTINUATION_BIT);
|
| 183 |
+
Count alig = aligd[0] & ~CONTINUATION_BIT;
|
| 184 |
+
vec.push_back(std::make_pair(PhraseAlignment(alig, reverse_), aligd[1]));
|
| 185 |
+
} while(!last);
|
| 186 |
+
|
| 187 |
+
return vec;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
void PhrasePairInfo::add_alignment(Count new_alignment)
|
| 191 |
+
{
|
| 192 |
+
Count i = 0;
|
| 193 |
+
bool last;
|
| 194 |
+
do {
|
| 195 |
+
Count *aligd = alignment_data(i++);
|
| 196 |
+
last = !(aligd[0] & CONTINUATION_BIT);
|
| 197 |
+
Count alig = aligd[0] & ~CONTINUATION_BIT;
|
| 198 |
+
if(alig == new_alignment) {
|
| 199 |
+
aligd[1]++;
|
| 200 |
+
return;
|
| 201 |
+
}
|
| 202 |
+
} while(!last);
|
| 203 |
+
|
| 204 |
+
realloc_data(i + 1);
|
| 205 |
+
|
| 206 |
+
Count *last_aligd = alignment_data(i - 1);
|
| 207 |
+
last_aligd[0] |= CONTINUATION_BIT;
|
| 208 |
+
|
| 209 |
+
Count *this_aligd = alignment_data(i);
|
| 210 |
+
this_aligd[0] = new_alignment;
|
| 211 |
+
this_aligd[1] = 1;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
void PhrasePairInfo::realloc_data(Count nalignments)
|
| 215 |
+
{
|
| 216 |
+
static boost::pool<> *pool[3] = { NULL, NULL, NULL };
|
| 217 |
+
|
| 218 |
+
size_t fixed_size = data_nscores_ * sizeof(Score) + data_ncounts_ * sizeof(Count);
|
| 219 |
+
size_t new_data_size = fixed_size + COUNTS_PER_ALIGNMENT * nalignments * sizeof(Count);
|
| 220 |
+
|
| 221 |
+
PhrasePairData new_data;
|
| 222 |
+
if(nalignments <= 3) {
|
| 223 |
+
if(!pool[nalignments - 1])
|
| 224 |
+
pool[nalignments - 1] = new boost::pool<>(new_data_size);
|
| 225 |
+
|
| 226 |
+
new_data = reinterpret_cast<PhrasePairData>(pool[nalignments - 1]->malloc());
|
| 227 |
+
} else
|
| 228 |
+
new_data = new char[new_data_size];
|
| 229 |
+
|
| 230 |
+
if(data_) {
|
| 231 |
+
memcpy(new_data, data_, fixed_size);
|
| 232 |
+
Count i = 0;
|
| 233 |
+
Count *old_aligd, *new_aligd;
|
| 234 |
+
do {
|
| 235 |
+
assert(i < nalignments);
|
| 236 |
+
old_aligd = alignment_data(data_, i);
|
| 237 |
+
new_aligd = alignment_data(new_data, i);
|
| 238 |
+
new_aligd[0] = old_aligd[0];
|
| 239 |
+
new_aligd[1] = old_aligd[1];
|
| 240 |
+
i++;
|
| 241 |
+
} while(old_aligd[0] & CONTINUATION_BIT);
|
| 242 |
+
if(nalignments <= 4)
|
| 243 |
+
pool[nalignments - 2]->free(data_);
|
| 244 |
+
else
|
| 245 |
+
delete[] data_;
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
data_ = new_data;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
/* PhraseInfoList */
|
| 252 |
+
|
| 253 |
+
Phrase PhraseInfoList::index_phrase(const String &s_phr)
|
| 254 |
+
{
|
| 255 |
+
IDMapType_::const_iterator it = idmap_.find(s_phr);
|
| 256 |
+
if(it != idmap_.end())
|
| 257 |
+
return it->second;
|
| 258 |
+
|
| 259 |
+
PhraseInfo *pi = phrase_info_pool_.construct(data_size_, s_phr);
|
| 260 |
+
|
| 261 |
+
list_.push_back(pi);
|
| 262 |
+
idmap_[s_phr] = list_.size() - 1;
|
| 263 |
+
return idmap_[s_phr];
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
DataIndex PhraseInfoList::register_data(Count size)
|
| 267 |
+
{
|
| 268 |
+
DataIndex start = data_size_;
|
| 269 |
+
data_size_ += size;
|
| 270 |
+
return start;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
void PhraseInfoList::attach_statistic(PhraseStatistic &s)
|
| 274 |
+
{
|
| 275 |
+
statistics_.push_back(&s);
|
| 276 |
+
s.attach(*this);
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
void PhraseInfoList::compute_statistics()
|
| 280 |
+
{
|
| 281 |
+
while(!statistics_.empty()) {
|
| 282 |
+
statistics_.front()->compute_statistic();
|
| 283 |
+
statistics_.pop_front();
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
/* PhraseTable */
|
| 288 |
+
|
| 289 |
+
void MemoryPhraseTable::load_data(std::istream &instream)
|
| 290 |
+
{
|
| 291 |
+
Count total_count = 0;
|
| 292 |
+
|
| 293 |
+
Timestamp t_load;
|
| 294 |
+
Count nlines = 1;
|
| 295 |
+
String line;
|
| 296 |
+
while(getline(instream, line)) {
|
| 297 |
+
size_t sep1 = line.find(" ||| ");
|
| 298 |
+
if(sep1 == line.npos) {
|
| 299 |
+
std::cerr << "Phrase separator not found in: " << line << std::endl;
|
| 300 |
+
abort();
|
| 301 |
+
}
|
| 302 |
+
size_t sep2 = line.find(" ||| ", sep1 + 1);
|
| 303 |
+
String s_src(line, 0, sep1);
|
| 304 |
+
String s_tgt(line, sep1 + 5, sep2 - sep1 - 5);
|
| 305 |
+
String s_alignment(line, sep2 + 5);
|
| 306 |
+
|
| 307 |
+
Phrase src = src_info_.index_phrase(s_src);
|
| 308 |
+
Phrase tgt = tgt_info_.index_phrase(s_tgt);
|
| 309 |
+
Count alignment = PhraseAlignment::index_alignment(src_info_[src].get_phrase().size(), tgt_info_[tgt].get_phrase().size(), s_alignment);
|
| 310 |
+
|
| 311 |
+
src_info_[src].inc_count();
|
| 312 |
+
tgt_info_[tgt].inc_count();
|
| 313 |
+
total_count++;
|
| 314 |
+
|
| 315 |
+
PhrasePair stpair(src, tgt);
|
| 316 |
+
PhrasePairCounts::iterator it = joint_counts_.find(stpair);
|
| 317 |
+
|
| 318 |
+
if(it == joint_counts_.end()) {
|
| 319 |
+
src_info_[src].inc_distinct();
|
| 320 |
+
tgt_info_[tgt].inc_distinct();
|
| 321 |
+
joint_counts_.insert(std::make_pair(stpair, PhrasePairInfo(src, tgt, alignment, 1).get_phrase_pair_data()));
|
| 322 |
+
} else {
|
| 323 |
+
PhrasePairInfo pi(src, tgt, it->second);
|
| 324 |
+
pi.inc_count();
|
| 325 |
+
pi.add_alignment(alignment);
|
| 326 |
+
it->second = pi.get_phrase_pair_data(); // may have changed by adding the alignment
|
| 327 |
+
}
|
| 328 |
+
if(nlines % 50000 == 0)
|
| 329 |
+
std:: cerr << "Read " << nlines << " lines in " << (t_load.elapsed_time() / 1000) << " ms." << std::endl;
|
| 330 |
+
nlines++;
|
| 331 |
+
}
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
void MemoryPhraseTable::attach_src_statistic(PhraseStatistic &s)
|
| 335 |
+
{
|
| 336 |
+
src_info_.attach_statistic(s);
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
void MemoryPhraseTable::attach_tgt_statistic(PhraseStatistic &s)
|
| 340 |
+
{
|
| 341 |
+
tgt_info_.attach_statistic(s);
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
void MemoryPhraseTable::compute_phrase_statistics()
|
| 345 |
+
{
|
| 346 |
+
src_info_.compute_statistics();
|
| 347 |
+
tgt_info_.compute_statistics();
|
| 348 |
+
}
|
mosesdecoder/contrib/memscore/scorer.h
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
// Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
// $Id$
|
| 4 |
+
|
| 5 |
+
#ifndef SCORER_H
|
| 6 |
+
#define SCORER_H
|
| 7 |
+
|
| 8 |
+
#include "memscore.h"
|
| 9 |
+
|
| 10 |
+
class PhraseScorerFactory
|
| 11 |
+
{
|
| 12 |
+
private:
|
| 13 |
+
PhraseTable &phrase_table_;
|
| 14 |
+
|
| 15 |
+
public:
|
| 16 |
+
explicit PhraseScorerFactory(PhraseTable &phrase_table) :
|
| 17 |
+
phrase_table_(phrase_table) {}
|
| 18 |
+
|
| 19 |
+
PhraseScorer *create_scorer(const char *argv[], int &argp, bool reverse);
|
| 20 |
+
|
| 21 |
+
PhraseTable &get_phrase_table() const {
|
| 22 |
+
return phrase_table_;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
static const std::vector<String> &scorer_list();
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
class PhraseScorer
|
| 29 |
+
{
|
| 30 |
+
protected:
|
| 31 |
+
PhraseTable &phrase_table_;
|
| 32 |
+
bool reverse_;
|
| 33 |
+
|
| 34 |
+
explicit PhraseScorer(PhraseTable &pt, bool reverse) :
|
| 35 |
+
phrase_table_(!reverse ? pt : pt.reverse()), reverse_(reverse) {}
|
| 36 |
+
|
| 37 |
+
PhraseTable::iterator get_pair(Phrase src, Phrase tgt) {
|
| 38 |
+
PhraseTable::iterator it = phrase_table_.find(std::make_pair(src, tgt));
|
| 39 |
+
assert(it != phrase_table_.end());
|
| 40 |
+
return it;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
private:
|
| 44 |
+
virtual void do_score_phrases() {}
|
| 45 |
+
|
| 46 |
+
virtual Score do_get_score(const PhraseTable::const_iterator &it) = 0;
|
| 47 |
+
|
| 48 |
+
public:
|
| 49 |
+
virtual ~PhraseScorer() {}
|
| 50 |
+
|
| 51 |
+
virtual Score get_discount() {}
|
| 52 |
+
|
| 53 |
+
void score_phrases() {
|
| 54 |
+
do_score_phrases();
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
Score get_score(const PhrasePairCounts::const_iterator &it) {
|
| 58 |
+
return do_get_score(phrase_table_.find(it));
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
Score get_score(const PhraseTable::const_iterator &it) {
|
| 62 |
+
return do_get_score(it);
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
Score get_score(Phrase src, Phrase tgt) {
|
| 66 |
+
PhraseTable::const_iterator it = get_pair(src, tgt);
|
| 67 |
+
return do_get_score(it);
|
| 68 |
+
}
|
| 69 |
+
};
|
| 70 |
+
|
| 71 |
+
#endif
|
mosesdecoder/contrib/memscore/timestamp.h
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// memscore - in-memory phrase scoring for Statistical Machine Translation
|
| 2 |
+
// Christian Hardmeier, FBK-irst, Trento, 2010
|
| 3 |
+
// $Id$
|
| 4 |
+
|
| 5 |
+
#ifndef TIMESTAMP_H
|
| 6 |
+
#define TIMESTAMP_H
|
| 7 |
+
|
| 8 |
+
#include <sys/time.h>
|
| 9 |
+
|
| 10 |
+
class Timestamp
|
| 11 |
+
{
|
| 12 |
+
private:
|
| 13 |
+
struct timeval tv_;
|
| 14 |
+
|
| 15 |
+
public:
|
| 16 |
+
typedef double time_difference;
|
| 17 |
+
|
| 18 |
+
Timestamp() {
|
| 19 |
+
gettimeofday(&tv_, NULL);
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
time_difference elapsed_time() const {
|
| 23 |
+
struct timeval tv2;
|
| 24 |
+
gettimeofday(&tv2, NULL);
|
| 25 |
+
return (tv2.tv_sec - tv_.tv_sec) * 1e6 + (tv2.tv_usec - tv_.tv_usec);
|
| 26 |
+
}
|
| 27 |
+
};
|
| 28 |
+
|
| 29 |
+
#endif
|
mosesdecoder/contrib/mira/Main.cpp
ADDED
|
@@ -0,0 +1,1849 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include <algorithm>
|
| 21 |
+
#include <cstdlib>
|
| 22 |
+
#include <ctime>
|
| 23 |
+
#include <string>
|
| 24 |
+
#include <vector>
|
| 25 |
+
#include <map>
|
| 26 |
+
|
| 27 |
+
#include <boost/program_options.hpp>
|
| 28 |
+
#include <boost/algorithm/string.hpp>
|
| 29 |
+
|
| 30 |
+
#ifdef MPI_ENABLE
|
| 31 |
+
#include <boost/mpi.hpp>
|
| 32 |
+
namespace mpi = boost::mpi;
|
| 33 |
+
#endif
|
| 34 |
+
|
| 35 |
+
#include "Main.h"
|
| 36 |
+
#include "Optimiser.h"
|
| 37 |
+
#include "Hildreth.h"
|
| 38 |
+
#include "HypothesisQueue.h"
|
| 39 |
+
#include "moses/StaticData.h"
|
| 40 |
+
#include "moses/ScoreComponentCollection.h"
|
| 41 |
+
#include "moses/ThreadPool.h"
|
| 42 |
+
#include "mert/BleuScorer.h"
|
| 43 |
+
#include "moses/FeatureVector.h"
|
| 44 |
+
|
| 45 |
+
#include "moses/FF/WordTranslationFeature.h"
|
| 46 |
+
#include "moses/FF/PhrasePairFeature.h"
|
| 47 |
+
#include "moses/FF/WordPenaltyProducer.h"
|
| 48 |
+
#include "moses/LM/Base.h"
|
| 49 |
+
#include "util/random.hh"
|
| 50 |
+
|
| 51 |
+
using namespace Mira;
|
| 52 |
+
using namespace std;
|
| 53 |
+
using namespace Moses;
|
| 54 |
+
namespace po = boost::program_options;
|
| 55 |
+
|
| 56 |
+
int main(int argc, char** argv)
|
| 57 |
+
{
|
| 58 |
+
util::rand_init();
|
| 59 |
+
size_t rank = 0;
|
| 60 |
+
size_t size = 1;
|
| 61 |
+
#ifdef MPI_ENABLE
|
| 62 |
+
mpi::environment env(argc,argv);
|
| 63 |
+
mpi::communicator world;
|
| 64 |
+
rank = world.rank();
|
| 65 |
+
size = world.size();
|
| 66 |
+
#endif
|
| 67 |
+
|
| 68 |
+
bool help;
|
| 69 |
+
int verbosity;
|
| 70 |
+
string mosesConfigFile;
|
| 71 |
+
string inputFile;
|
| 72 |
+
vector<string> referenceFiles;
|
| 73 |
+
vector<string> mosesConfigFilesFolds, inputFilesFolds, referenceFilesFolds;
|
| 74 |
+
// string coreWeightFile, startWeightFile;
|
| 75 |
+
size_t epochs;
|
| 76 |
+
string learner;
|
| 77 |
+
bool shuffle;
|
| 78 |
+
size_t mixingFrequency;
|
| 79 |
+
size_t weightDumpFrequency;
|
| 80 |
+
string weightDumpStem;
|
| 81 |
+
bool scale_margin;
|
| 82 |
+
bool scale_update;
|
| 83 |
+
size_t n;
|
| 84 |
+
size_t batchSize;
|
| 85 |
+
bool distinctNbest;
|
| 86 |
+
bool accumulateWeights;
|
| 87 |
+
float historySmoothing;
|
| 88 |
+
bool scaleByInputLength, scaleByAvgInputLength;
|
| 89 |
+
bool scaleByInverseLength, scaleByAvgInverseLength;
|
| 90 |
+
float scaleByX;
|
| 91 |
+
float slack;
|
| 92 |
+
bool averageWeights;
|
| 93 |
+
bool weightConvergence;
|
| 94 |
+
float learning_rate;
|
| 95 |
+
float mira_learning_rate;
|
| 96 |
+
float perceptron_learning_rate;
|
| 97 |
+
string decoder_settings;
|
| 98 |
+
float min_weight_change;
|
| 99 |
+
bool normaliseWeights, normaliseMargin;
|
| 100 |
+
bool print_feature_values;
|
| 101 |
+
bool historyBleu ;
|
| 102 |
+
bool sentenceBleu;
|
| 103 |
+
bool perceptron_update;
|
| 104 |
+
bool hope_fear;
|
| 105 |
+
bool model_hope_fear;
|
| 106 |
+
size_t hope_n, fear_n;
|
| 107 |
+
size_t bleu_smoothing_scheme;
|
| 108 |
+
float min_oracle_bleu;
|
| 109 |
+
float minBleuRatio, maxBleuRatio;
|
| 110 |
+
bool boost;
|
| 111 |
+
bool decode_hope, decode_fear, decode_model;
|
| 112 |
+
string decode_filename;
|
| 113 |
+
bool batchEqualsShard;
|
| 114 |
+
bool sparseAverage, dumpMixedWeights, sparseNoAverage;
|
| 115 |
+
int featureCutoff;
|
| 116 |
+
bool pruneZeroWeights;
|
| 117 |
+
bool printFeatureCounts, printNbestWithFeatures;
|
| 118 |
+
bool avgRefLength;
|
| 119 |
+
bool print_weights, print_core_weights, debug_model, scale_lm, scale_wp;
|
| 120 |
+
float scale_lm_factor, scale_wp_factor;
|
| 121 |
+
bool kbest;
|
| 122 |
+
string moses_src;
|
| 123 |
+
float sigmoidParam;
|
| 124 |
+
float bleuWeight, bleuWeight_hope, bleuWeight_fear;
|
| 125 |
+
bool bleu_weight_lm;
|
| 126 |
+
float bleu_weight_lm_factor;
|
| 127 |
+
bool l1_regularize, l2_regularize, l1_reg_sparse, l2_reg_sparse;
|
| 128 |
+
float l1_lambda, l2_lambda;
|
| 129 |
+
bool most_violated, most_violated_reg, all_violated, max_bleu_diff;
|
| 130 |
+
bool feature_confidence, signed_counts;
|
| 131 |
+
float decay_core, decay_sparse, core_r0, sparse_r0;
|
| 132 |
+
float bleu_weight_fear_factor;
|
| 133 |
+
bool hildreth;
|
| 134 |
+
float add2lm;
|
| 135 |
+
|
| 136 |
+
// compute real sentence Bleu scores on complete translations, disable Bleu feature
|
| 137 |
+
bool realBleu, disableBleuFeature;
|
| 138 |
+
bool rescaleSlack;
|
| 139 |
+
bool makePairs;
|
| 140 |
+
bool debug;
|
| 141 |
+
bool reg_on_every_mix;
|
| 142 |
+
size_t continue_epoch;
|
| 143 |
+
bool modelPlusBleu, simpleHistoryBleu;
|
| 144 |
+
po::options_description desc("Allowed options");
|
| 145 |
+
desc.add_options()
|
| 146 |
+
("continue-epoch", po::value<size_t>(&continue_epoch)->default_value(0), "Continue an interrupted experiment from this epoch on")
|
| 147 |
+
("freq-reg", po::value<bool>(®_on_every_mix)->default_value(false), "Regularize after every weight mixing")
|
| 148 |
+
("l1sparse", po::value<bool>(&l1_reg_sparse)->default_value(true), "L1-regularization for sparse weights only")
|
| 149 |
+
("l2sparse", po::value<bool>(&l2_reg_sparse)->default_value(true), "L2-regularization for sparse weights only")
|
| 150 |
+
("mv-reg", po::value<bool>(&most_violated_reg)->default_value(false), "Regularize most violated constraint")
|
| 151 |
+
("most-violated", po::value<bool>(&most_violated)->default_value(false), "Add most violated constraint")
|
| 152 |
+
("all-violated", po::value<bool>(&all_violated)->default_value(false), "Add all violated constraints")
|
| 153 |
+
("feature-confidence", po::value<bool>(&feature_confidence)->default_value(false), "Confidence-weighted learning")
|
| 154 |
+
("signed-counts", po::value<bool>(&signed_counts)->default_value(false), "Use signed feature counts for CWL")
|
| 155 |
+
("dbg", po::value<bool>(&debug)->default_value(true), "More debug output")
|
| 156 |
+
("make-pairs", po::value<bool>(&makePairs)->default_value(true), "Make pairs of hypotheses for 1slack")
|
| 157 |
+
("debug", po::value<bool>(&debug)->default_value(true), "More debug output")
|
| 158 |
+
("rescale-slack", po::value<bool>(&rescaleSlack)->default_value(false), "Rescale slack in 1-slack formulation")
|
| 159 |
+
("add2lm", po::value<float>(&add2lm)->default_value(0.0), "Add the specified amount to all LM weights")
|
| 160 |
+
("hildreth", po::value<bool>(&hildreth)->default_value(false), "Prefer Hildreth over analytical update")
|
| 161 |
+
("model-plus-bleu", po::value<bool>(&modelPlusBleu)->default_value(false), "Use the sum of model score and +/- bleu to select hope and fear translations")
|
| 162 |
+
("simple-history-bleu", po::value<bool>(&simpleHistoryBleu)->default_value(false), "Simple history Bleu")
|
| 163 |
+
|
| 164 |
+
("bleu-weight", po::value<float>(&bleuWeight)->default_value(1.0), "Bleu weight used in decoder objective")
|
| 165 |
+
("bw-hope", po::value<float>(&bleuWeight_hope)->default_value(-1.0), "Bleu weight used in decoder objective for hope")
|
| 166 |
+
("bw-fear", po::value<float>(&bleuWeight_fear)->default_value(-1.0), "Bleu weight used in decoder objective for fear")
|
| 167 |
+
|
| 168 |
+
("core-r0", po::value<float>(&core_r0)->default_value(1.0), "Start learning rate for core features")
|
| 169 |
+
("sparse-r0", po::value<float>(&sparse_r0)->default_value(1.0), "Start learning rate for sparse features")
|
| 170 |
+
("decay-core", po::value<float>(&decay_core)->default_value(0.01), "Decay for core feature learning rate")
|
| 171 |
+
("decay-sparse", po::value<float>(&decay_sparse)->default_value(0.01), "Decay for sparse feature learning rate")
|
| 172 |
+
|
| 173 |
+
("tie-bw-to-lm", po::value<bool>(&bleu_weight_lm)->default_value(true), "Make bleu weight depend on lm weight")
|
| 174 |
+
("bw-lm-factor", po::value<float>(&bleu_weight_lm_factor)->default_value(2.0), "Make bleu weight depend on lm weight by this factor")
|
| 175 |
+
("bw-factor-fear", po::value<float>(&bleu_weight_fear_factor)->default_value(1.0), "Multiply fear weight by this factor")
|
| 176 |
+
("accumulate-weights", po::value<bool>(&accumulateWeights)->default_value(false), "Accumulate and average weights over all epochs")
|
| 177 |
+
("average-weights", po::value<bool>(&averageWeights)->default_value(false), "Set decoder weights to average weights after each update")
|
| 178 |
+
("avg-ref-length", po::value<bool>(&avgRefLength)->default_value(false), "Use average reference length instead of shortest for BLEU score feature")
|
| 179 |
+
("batch-equals-shard", po::value<bool>(&batchEqualsShard)->default_value(false), "Batch size is equal to shard size (purely batch)")
|
| 180 |
+
("batch-size,b", po::value<size_t>(&batchSize)->default_value(1), "Size of batch that is send to optimiser for weight adjustments")
|
| 181 |
+
("bleu-smoothing-scheme", po::value<size_t>(&bleu_smoothing_scheme)->default_value(1), "Set a smoothing scheme for sentence-Bleu: +1 (1), +0.1 (2), papineni (3) (default:1)")
|
| 182 |
+
("boost", po::value<bool>(&boost)->default_value(false), "Apply boosting factor to updates on misranked candidates")
|
| 183 |
+
("config,f", po::value<string>(&mosesConfigFile), "Moses ini-file")
|
| 184 |
+
("configs-folds", po::value<vector<string> >(&mosesConfigFilesFolds), "Moses ini-files, one for each fold")
|
| 185 |
+
("debug-model", po::value<bool>(&debug_model)->default_value(false), "Get best model translation for debugging purposes")
|
| 186 |
+
("decode-hope", po::value<bool>(&decode_hope)->default_value(false), "Decode dev input set according to hope objective")
|
| 187 |
+
("decode-fear", po::value<bool>(&decode_fear)->default_value(false), "Decode dev input set according to fear objective")
|
| 188 |
+
("decode-model", po::value<bool>(&decode_model)->default_value(false), "Decode dev input set according to normal objective")
|
| 189 |
+
("decode-filename", po::value<string>(&decode_filename), "Filename for Bleu objective translations")
|
| 190 |
+
("decoder-settings", po::value<string>(&decoder_settings)->default_value(""), "Decoder settings for tuning runs")
|
| 191 |
+
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
|
| 192 |
+
("dump-mixed-weights", po::value<bool>(&dumpMixedWeights)->default_value(false), "Dump mixed weights instead of averaged weights")
|
| 193 |
+
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
|
| 194 |
+
("feature-cutoff", po::value<int>(&featureCutoff)->default_value(-1), "Feature cutoff as additional regularization for sparse features")
|
| 195 |
+
("fear-n", po::value<size_t>(&fear_n)->default_value(1), "Number of fear translations used")
|
| 196 |
+
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
| 197 |
+
("history-bleu", po::value<bool>(&historyBleu)->default_value(false), "Use 1best translations to update the history")
|
| 198 |
+
("history-smoothing", po::value<float>(&historySmoothing)->default_value(0.9), "Adjust the factor for history smoothing")
|
| 199 |
+
("hope-fear", po::value<bool>(&hope_fear)->default_value(true), "Use only hope and fear translations for optimisation (not model)")
|
| 200 |
+
("hope-n", po::value<size_t>(&hope_n)->default_value(2), "Number of hope translations used")
|
| 201 |
+
("input-file,i", po::value<string>(&inputFile), "Input file containing tokenised source")
|
| 202 |
+
("input-files-folds", po::value<vector<string> >(&inputFilesFolds), "Input files containing tokenised source, one for each fold")
|
| 203 |
+
("learner,l", po::value<string>(&learner)->default_value("mira"), "Learning algorithm")
|
| 204 |
+
("l1-lambda", po::value<float>(&l1_lambda)->default_value(0.0001), "Lambda for l1-regularization (w_i +/- lambda)")
|
| 205 |
+
("l2-lambda", po::value<float>(&l2_lambda)->default_value(0.01), "Lambda for l2-regularization (w_i * (1 - lambda))")
|
| 206 |
+
("l1-reg", po::value<bool>(&l1_regularize)->default_value(false), "L1-regularization")
|
| 207 |
+
("l2-reg", po::value<bool>(&l2_regularize)->default_value(false), "L2-regularization")
|
| 208 |
+
("min-bleu-ratio", po::value<float>(&minBleuRatio)->default_value(-1), "Set a minimum BLEU ratio between hope and fear")
|
| 209 |
+
("max-bleu-ratio", po::value<float>(&maxBleuRatio)->default_value(-1), "Set a maximum BLEU ratio between hope and fear")
|
| 210 |
+
("max-bleu-diff", po::value<bool>(&max_bleu_diff)->default_value(true), "Select hope/fear with maximum Bleu difference")
|
| 211 |
+
("min-oracle-bleu", po::value<float>(&min_oracle_bleu)->default_value(0), "Set a minimum oracle BLEU score")
|
| 212 |
+
("min-weight-change", po::value<float>(&min_weight_change)->default_value(0.0001), "Set minimum weight change for stopping criterion")
|
| 213 |
+
("mira-learning-rate", po::value<float>(&mira_learning_rate)->default_value(1), "Learning rate for MIRA (fixed or flexible)")
|
| 214 |
+
("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(10), "How often per epoch to mix weights, when using mpi")
|
| 215 |
+
("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation")
|
| 216 |
+
("moses-src", po::value<string>(&moses_src)->default_value(""), "Moses source directory")
|
| 217 |
+
("nbest,n", po::value<size_t>(&n)->default_value(30), "Number of translations in n-best list")
|
| 218 |
+
("normalise-weights", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
|
| 219 |
+
("normalise-margin", po::value<bool>(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1")
|
| 220 |
+
("perceptron-learning-rate", po::value<float>(&perceptron_learning_rate)->default_value(0.01), "Perceptron learning rate")
|
| 221 |
+
("print-feature-values", po::value<bool>(&print_feature_values)->default_value(false), "Print out feature values")
|
| 222 |
+
("print-feature-counts", po::value<bool>(&printFeatureCounts)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch")
|
| 223 |
+
("print-nbest-with-features", po::value<bool>(&printNbestWithFeatures)->default_value(false), "Print out feature values, print feature list with hope counts after 1st epoch")
|
| 224 |
+
("print-weights", po::value<bool>(&print_weights)->default_value(false), "Print out current weights")
|
| 225 |
+
("print-core-weights", po::value<bool>(&print_core_weights)->default_value(true), "Print out current core weights")
|
| 226 |
+
("prune-zero-weights", po::value<bool>(&pruneZeroWeights)->default_value(false), "Prune zero-valued sparse feature weights")
|
| 227 |
+
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
|
| 228 |
+
("reference-files-folds", po::value<vector<string> >(&referenceFilesFolds), "Reference translation files for training, one for each fold")
|
| 229 |
+
("kbest", po::value<bool>(&kbest)->default_value(true), "Select hope/fear pairs from a list of nbest translations")
|
| 230 |
+
|
| 231 |
+
("scale-by-inverse-length", po::value<bool>(&scaleByInverseLength)->default_value(false), "Scale BLEU by (history of) inverse input length")
|
| 232 |
+
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(true), "Scale BLEU by (history of) input length")
|
| 233 |
+
("scale-by-avg-input-length", po::value<bool>(&scaleByAvgInputLength)->default_value(false), "Scale BLEU by average input length")
|
| 234 |
+
("scale-by-avg-inverse-length", po::value<bool>(&scaleByAvgInverseLength)->default_value(false), "Scale BLEU by average inverse input length")
|
| 235 |
+
("scale-by-x", po::value<float>(&scaleByX)->default_value(0.1), "Scale the BLEU score by value x")
|
| 236 |
+
("scale-lm", po::value<bool>(&scale_lm)->default_value(true), "Scale the language model feature")
|
| 237 |
+
("scale-factor-lm", po::value<float>(&scale_lm_factor)->default_value(0.5), "Scale the language model feature by this factor")
|
| 238 |
+
("scale-wp", po::value<bool>(&scale_wp)->default_value(false), "Scale the word penalty feature")
|
| 239 |
+
("scale-factor-wp", po::value<float>(&scale_wp_factor)->default_value(2), "Scale the word penalty feature by this factor")
|
| 240 |
+
("scale-margin", po::value<bool>(&scale_margin)->default_value(0), "Scale the margin by the Bleu score of the oracle translation")
|
| 241 |
+
("sentence-level-bleu", po::value<bool>(&sentenceBleu)->default_value(true), "Use a sentences level Bleu scoring function")
|
| 242 |
+
("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
|
| 243 |
+
("sigmoid-param", po::value<float>(&sigmoidParam)->default_value(1), "y=sigmoidParam is the axis that this sigmoid approaches")
|
| 244 |
+
("slack", po::value<float>(&slack)->default_value(0.05), "Use slack in optimiser")
|
| 245 |
+
("sparse-average", po::value<bool>(&sparseAverage)->default_value(false), "Average weights by the number of processes")
|
| 246 |
+
("sparse-no-average", po::value<bool>(&sparseNoAverage)->default_value(false), "Don't average sparse weights, just sum")
|
| 247 |
+
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
|
| 248 |
+
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
|
| 249 |
+
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(2), "How often per epoch to dump weights (mpi)")
|
| 250 |
+
("weight-dump-stem", po::value<string>(&weightDumpStem)->default_value("weights"), "Stem of filename to use for dumping weights");
|
| 251 |
+
|
| 252 |
+
po::options_description cmdline_options;
|
| 253 |
+
cmdline_options.add(desc);
|
| 254 |
+
po::variables_map vm;
|
| 255 |
+
po::store(po::command_line_parser(argc, argv). options(cmdline_options).run(), vm);
|
| 256 |
+
po::notify(vm);
|
| 257 |
+
|
| 258 |
+
if (help) {
|
| 259 |
+
std::cout << "Usage: " + string(argv[0])
|
| 260 |
+
+ " -f mosesini-file -i input-file -r reference-file(s) [options]" << std::endl;
|
| 261 |
+
std::cout << desc << std::endl;
|
| 262 |
+
return 0;
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
const StaticData &staticData = StaticData::Instance();
|
| 266 |
+
|
| 267 |
+
bool trainWithMultipleFolds = false;
|
| 268 |
+
if (mosesConfigFilesFolds.size() > 0 || inputFilesFolds.size() > 0 || referenceFilesFolds.size() > 0) {
|
| 269 |
+
if (rank == 0)
|
| 270 |
+
cerr << "Training with " << mosesConfigFilesFolds.size() << " folds" << endl;
|
| 271 |
+
trainWithMultipleFolds = true;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
if (dumpMixedWeights && (mixingFrequency != weightDumpFrequency)) {
|
| 275 |
+
cerr << "Set mixing frequency = weight dump frequency for dumping mixed weights!" << endl;
|
| 276 |
+
exit(1);
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
if ((sparseAverage || sparseNoAverage) && averageWeights) {
|
| 280 |
+
cerr << "Parameters --sparse-average 1/--sparse-no-average 1 and --average-weights 1 are incompatible (not implemented)" << endl;
|
| 281 |
+
exit(1);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
if (trainWithMultipleFolds) {
|
| 285 |
+
if (!mosesConfigFilesFolds.size()) {
|
| 286 |
+
cerr << "Error: No moses ini files specified for training with folds" << endl;
|
| 287 |
+
exit(1);
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
if (!inputFilesFolds.size()) {
|
| 291 |
+
cerr << "Error: No input files specified for training with folds" << endl;
|
| 292 |
+
exit(1);
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
if (!referenceFilesFolds.size()) {
|
| 296 |
+
cerr << "Error: No reference files specified for training with folds" << endl;
|
| 297 |
+
exit(1);
|
| 298 |
+
}
|
| 299 |
+
} else {
|
| 300 |
+
if (mosesConfigFile.empty()) {
|
| 301 |
+
cerr << "Error: No moses ini file specified" << endl;
|
| 302 |
+
return 1;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
if (inputFile.empty()) {
|
| 306 |
+
cerr << "Error: No input file specified" << endl;
|
| 307 |
+
return 1;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
if (!referenceFiles.size()) {
|
| 311 |
+
cerr << "Error: No reference files specified" << endl;
|
| 312 |
+
return 1;
|
| 313 |
+
}
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
// load input and references
|
| 317 |
+
vector<string> inputSentences;
|
| 318 |
+
size_t inputSize = trainWithMultipleFolds? inputFilesFolds.size(): 0;
|
| 319 |
+
size_t refSize = trainWithMultipleFolds? referenceFilesFolds.size(): referenceFiles.size();
|
| 320 |
+
vector<vector<string> > inputSentencesFolds(inputSize);
|
| 321 |
+
vector<vector<string> > referenceSentences(refSize);
|
| 322 |
+
|
| 323 |
+
// number of cores for each fold
|
| 324 |
+
size_t coresPerFold = 0, myFold = 0;
|
| 325 |
+
if (trainWithMultipleFolds) {
|
| 326 |
+
if (mosesConfigFilesFolds.size() > size) {
|
| 327 |
+
cerr << "Number of cores has to be a multiple of the number of folds" << endl;
|
| 328 |
+
exit(1);
|
| 329 |
+
}
|
| 330 |
+
coresPerFold = size/mosesConfigFilesFolds.size();
|
| 331 |
+
if (size % coresPerFold > 0) {
|
| 332 |
+
cerr << "Number of cores has to be a multiple of the number of folds" << endl;
|
| 333 |
+
exit(1);
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
if (rank == 0)
|
| 337 |
+
cerr << "Number of cores per fold: " << coresPerFold << endl;
|
| 338 |
+
myFold = rank/coresPerFold;
|
| 339 |
+
cerr << "Rank " << rank << ", my fold: " << myFold << endl;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
// NOTE: we do not actually need the references here, because we are reading them in from StaticData
|
| 343 |
+
if (trainWithMultipleFolds) {
|
| 344 |
+
if (!loadSentences(inputFilesFolds[myFold], inputSentencesFolds[myFold])) {
|
| 345 |
+
cerr << "Error: Failed to load input sentences from " << inputFilesFolds[myFold] << endl;
|
| 346 |
+
exit(1);
|
| 347 |
+
}
|
| 348 |
+
VERBOSE(1, "Rank " << rank << " reading inputs from " << inputFilesFolds[myFold] << endl);
|
| 349 |
+
|
| 350 |
+
if (!loadSentences(referenceFilesFolds[myFold], referenceSentences[myFold])) {
|
| 351 |
+
cerr << "Error: Failed to load reference sentences from " << referenceFilesFolds[myFold] << endl;
|
| 352 |
+
exit(1);
|
| 353 |
+
}
|
| 354 |
+
if (referenceSentences[myFold].size() != inputSentencesFolds[myFold].size()) {
|
| 355 |
+
cerr << "Error: Input file length (" << inputSentencesFolds[myFold].size() << ") != ("
|
| 356 |
+
<< referenceSentences[myFold].size() << ") reference file length (rank " << rank << ")" << endl;
|
| 357 |
+
exit(1);
|
| 358 |
+
}
|
| 359 |
+
VERBOSE(1, "Rank " << rank << " reading references from " << referenceFilesFolds[myFold] << endl);
|
| 360 |
+
} else {
|
| 361 |
+
if (!loadSentences(inputFile, inputSentences)) {
|
| 362 |
+
cerr << "Error: Failed to load input sentences from " << inputFile << endl;
|
| 363 |
+
return 1;
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
for (size_t i = 0; i < referenceFiles.size(); ++i) {
|
| 367 |
+
if (!loadSentences(referenceFiles[i], referenceSentences[i])) {
|
| 368 |
+
cerr << "Error: Failed to load reference sentences from "
|
| 369 |
+
<< referenceFiles[i] << endl;
|
| 370 |
+
return 1;
|
| 371 |
+
}
|
| 372 |
+
if (referenceSentences[i].size() != inputSentences.size()) {
|
| 373 |
+
cerr << "Error: Input file length (" << inputSentences.size() << ") != ("
|
| 374 |
+
<< referenceSentences[i].size() << ") length of reference file " << i
|
| 375 |
+
<< endl;
|
| 376 |
+
return 1;
|
| 377 |
+
}
|
| 378 |
+
}
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
if (scaleByAvgInputLength || scaleByInverseLength || scaleByAvgInverseLength)
|
| 382 |
+
scaleByInputLength = false;
|
| 383 |
+
|
| 384 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 385 |
+
sentenceBleu = false;
|
| 386 |
+
cerr << "Using history Bleu. " << endl;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
if (kbest) {
|
| 390 |
+
realBleu = true;
|
| 391 |
+
disableBleuFeature = true;
|
| 392 |
+
cerr << "Use kbest lists and real Bleu scores, disable Bleu feature.." << endl;
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
// initialise Moses
|
| 396 |
+
// add references to initialize Bleu feature
|
| 397 |
+
boost::trim(decoder_settings);
|
| 398 |
+
decoder_settings += " -mira -n-best-list - " + boost::lexical_cast<string>(n) + " distinct";
|
| 399 |
+
|
| 400 |
+
vector<string> decoder_params;
|
| 401 |
+
boost::split(decoder_params, decoder_settings, boost::is_any_of("\t "));
|
| 402 |
+
|
| 403 |
+
// bleu feature
|
| 404 |
+
decoder_params.push_back("-feature-add");
|
| 405 |
+
|
| 406 |
+
decoder_settings = "BleuScoreFeature tuneable=false references=";
|
| 407 |
+
if (trainWithMultipleFolds) {
|
| 408 |
+
decoder_settings += referenceFilesFolds[myFold];
|
| 409 |
+
} else {
|
| 410 |
+
decoder_settings += referenceFiles[0];
|
| 411 |
+
for (size_t i=1; i < referenceFiles.size(); ++i) {
|
| 412 |
+
decoder_settings += ",";
|
| 413 |
+
decoder_settings += referenceFiles[i];
|
| 414 |
+
}
|
| 415 |
+
}
|
| 416 |
+
decoder_params.push_back(decoder_settings);
|
| 417 |
+
|
| 418 |
+
string configFile = trainWithMultipleFolds? mosesConfigFilesFolds[myFold] : mosesConfigFile;
|
| 419 |
+
VERBOSE(1, "Rank " << rank << " reading config file from " << configFile << endl);
|
| 420 |
+
MosesDecoder* decoder = new MosesDecoder(configFile, verbosity, decoder_params.size(), decoder_params);
|
| 421 |
+
decoder->setBleuParameters(disableBleuFeature, sentenceBleu, scaleByInputLength, scaleByAvgInputLength,
|
| 422 |
+
scaleByInverseLength, scaleByAvgInverseLength,
|
| 423 |
+
scaleByX, historySmoothing, bleu_smoothing_scheme, simpleHistoryBleu);
|
| 424 |
+
bool chartDecoding = staticData.IsChart();
|
| 425 |
+
|
| 426 |
+
// Optionally shuffle the sentences
|
| 427 |
+
vector<size_t> order;
|
| 428 |
+
if (trainWithMultipleFolds) {
|
| 429 |
+
for (size_t i = 0; i < inputSentencesFolds[myFold].size(); ++i) {
|
| 430 |
+
order.push_back(i);
|
| 431 |
+
}
|
| 432 |
+
} else {
|
| 433 |
+
if (rank == 0) {
|
| 434 |
+
for (size_t i = 0; i < inputSentences.size(); ++i) {
|
| 435 |
+
order.push_back(i);
|
| 436 |
+
}
|
| 437 |
+
}
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
// initialise optimizer
|
| 441 |
+
Optimiser* optimiser = NULL;
|
| 442 |
+
if (learner == "mira") {
|
| 443 |
+
if (rank == 0) {
|
| 444 |
+
cerr << "Optimising using Mira" << endl;
|
| 445 |
+
cerr << "slack: " << slack << ", learning rate: " << mira_learning_rate << endl;
|
| 446 |
+
if (normaliseMargin)
|
| 447 |
+
cerr << "sigmoid parameter: " << sigmoidParam << endl;
|
| 448 |
+
}
|
| 449 |
+
optimiser = new MiraOptimiser(slack, scale_margin, scale_update, boost, normaliseMargin, sigmoidParam);
|
| 450 |
+
learning_rate = mira_learning_rate;
|
| 451 |
+
perceptron_update = false;
|
| 452 |
+
} else if (learner == "perceptron") {
|
| 453 |
+
if (rank == 0) {
|
| 454 |
+
cerr << "Optimising using Perceptron" << endl;
|
| 455 |
+
}
|
| 456 |
+
optimiser = new Perceptron();
|
| 457 |
+
learning_rate = perceptron_learning_rate;
|
| 458 |
+
perceptron_update = true;
|
| 459 |
+
model_hope_fear = false; // mira only
|
| 460 |
+
hope_fear = false; // mira only
|
| 461 |
+
n = 1;
|
| 462 |
+
hope_n = 1;
|
| 463 |
+
fear_n = 1;
|
| 464 |
+
} else {
|
| 465 |
+
cerr << "Error: Unknown optimiser: " << learner << endl;
|
| 466 |
+
return 1;
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
// resolve parameter dependencies
|
| 470 |
+
if (batchSize > 1 && perceptron_update) {
|
| 471 |
+
batchSize = 1;
|
| 472 |
+
cerr << "Info: Setting batch size to 1 for perceptron update" << endl;
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
if (hope_n == 0)
|
| 476 |
+
hope_n = n;
|
| 477 |
+
if (fear_n == 0)
|
| 478 |
+
fear_n = n;
|
| 479 |
+
|
| 480 |
+
if (model_hope_fear || kbest)
|
| 481 |
+
hope_fear = false; // is true by default
|
| 482 |
+
if (learner == "mira" && !(hope_fear || model_hope_fear || kbest)) {
|
| 483 |
+
cerr << "Error: Need to select one of parameters --hope-fear/--model-hope-fear/--kbest for mira update." << endl;
|
| 484 |
+
return 1;
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
#ifdef MPI_ENABLE
|
| 488 |
+
if (!trainWithMultipleFolds)
|
| 489 |
+
mpi::broadcast(world, order, 0);
|
| 490 |
+
#endif
|
| 491 |
+
|
| 492 |
+
// Create shards according to the number of processes used
|
| 493 |
+
vector<size_t> shard;
|
| 494 |
+
if (trainWithMultipleFolds) {
|
| 495 |
+
size_t shardSize = order.size()/coresPerFold;
|
| 496 |
+
size_t shardStart = (size_t) (shardSize * (rank % coresPerFold));
|
| 497 |
+
size_t shardEnd = shardStart + shardSize;
|
| 498 |
+
if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold
|
| 499 |
+
shardEnd = order.size();
|
| 500 |
+
shardSize = shardEnd - shardStart;
|
| 501 |
+
}
|
| 502 |
+
VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl);
|
| 503 |
+
VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl);
|
| 504 |
+
shard.resize(shardSize);
|
| 505 |
+
copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
|
| 506 |
+
batchSize = 1;
|
| 507 |
+
} else {
|
| 508 |
+
size_t shardSize = order.size() / size;
|
| 509 |
+
size_t shardStart = (size_t) (shardSize * rank);
|
| 510 |
+
size_t shardEnd = (size_t) (shardSize * (rank + 1));
|
| 511 |
+
if (rank == size - 1) {
|
| 512 |
+
shardEnd = order.size();
|
| 513 |
+
shardSize = shardEnd - shardStart;
|
| 514 |
+
}
|
| 515 |
+
VERBOSE(1, "Rank: " << rank << " Shard size: " << shardSize << endl);
|
| 516 |
+
VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl);
|
| 517 |
+
shard.resize(shardSize);
|
| 518 |
+
copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
|
| 519 |
+
if (batchEqualsShard)
|
| 520 |
+
batchSize = shardSize;
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
// get reference to feature functions
|
| 524 |
+
// const vector<FeatureFunction*> &featureFunctions = FeatureFunction::GetFeatureFunctions();
|
| 525 |
+
ScoreComponentCollection initialWeights = decoder->getWeights();
|
| 526 |
+
|
| 527 |
+
if (add2lm != 0) {
|
| 528 |
+
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 529 |
+
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
| 530 |
+
const StatefulFeatureFunction *ff = statefulFFs[i];
|
| 531 |
+
const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
|
| 532 |
+
|
| 533 |
+
if (lm) {
|
| 534 |
+
float lmWeight = initialWeights.GetScoreForProducer(lm) + add2lm;
|
| 535 |
+
initialWeights.Assign(lm, lmWeight);
|
| 536 |
+
cerr << "Rank " << rank << ", add " << add2lm << " to lm weight." << endl;
|
| 537 |
+
}
|
| 538 |
+
}
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
if (normaliseWeights) {
|
| 542 |
+
initialWeights.L1Normalise();
|
| 543 |
+
cerr << "Rank " << rank << ", normalised initial weights: " << initialWeights << endl;
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
decoder->setWeights(initialWeights);
|
| 547 |
+
|
| 548 |
+
// set bleu weight to twice the size of the language model weight(s)
|
| 549 |
+
if (bleu_weight_lm) {
|
| 550 |
+
float lmSum = 0;
|
| 551 |
+
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 552 |
+
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
| 553 |
+
const StatefulFeatureFunction *ff = statefulFFs[i];
|
| 554 |
+
const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
|
| 555 |
+
|
| 556 |
+
if (lm) {
|
| 557 |
+
lmSum += abs(initialWeights.GetScoreForProducer(lm));
|
| 558 |
+
}
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
bleuWeight = lmSum * bleu_weight_lm_factor;
|
| 562 |
+
if (!kbest) cerr << "Set bleu weight to lm weight * " << bleu_weight_lm_factor << endl;
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
// bleu weights can be set separately for hope and fear; otherwise they are both set to 'lm weight * bleu_weight_lm_factor'
|
| 566 |
+
if (bleuWeight_hope == -1) {
|
| 567 |
+
bleuWeight_hope = bleuWeight;
|
| 568 |
+
}
|
| 569 |
+
if (bleuWeight_fear == -1) {
|
| 570 |
+
bleuWeight_fear = bleuWeight;
|
| 571 |
+
}
|
| 572 |
+
bleuWeight_fear *= bleu_weight_fear_factor;
|
| 573 |
+
if (!kbest) {
|
| 574 |
+
cerr << "Bleu weight: " << bleuWeight << endl;
|
| 575 |
+
cerr << "Bleu weight fear: " << bleuWeight_fear << endl;
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
if (decode_hope || decode_fear || decode_model) {
|
| 579 |
+
size_t decode = 1;
|
| 580 |
+
if (decode_fear) decode = 2;
|
| 581 |
+
if (decode_model) decode = 3;
|
| 582 |
+
decodeHopeOrFear(rank, size, decode, decode_filename, inputSentences, decoder, n, bleuWeight);
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
//Main loop:
|
| 586 |
+
ScoreComponentCollection cumulativeWeights; // collect weights per epoch to produce an average
|
| 587 |
+
ScoreComponentCollection cumulativeWeightsBinary;
|
| 588 |
+
size_t numberOfUpdates = 0;
|
| 589 |
+
size_t numberOfUpdatesThisEpoch = 0;
|
| 590 |
+
|
| 591 |
+
time_t now;
|
| 592 |
+
time(&now);
|
| 593 |
+
cerr << "Rank " << rank << ", " << ctime(&now);
|
| 594 |
+
|
| 595 |
+
float avgInputLength = 0;
|
| 596 |
+
float sumOfInputs = 0;
|
| 597 |
+
size_t numberOfInputs = 0;
|
| 598 |
+
|
| 599 |
+
ScoreComponentCollection mixedWeights;
|
| 600 |
+
ScoreComponentCollection mixedWeightsPrevious;
|
| 601 |
+
ScoreComponentCollection mixedWeightsBeforePrevious;
|
| 602 |
+
ScoreComponentCollection mixedAverageWeights;
|
| 603 |
+
ScoreComponentCollection mixedAverageWeightsPrevious;
|
| 604 |
+
ScoreComponentCollection mixedAverageWeightsBeforePrevious;
|
| 605 |
+
|
| 606 |
+
bool stop = false;
|
| 607 |
+
// int sumStillViolatedConstraints;
|
| 608 |
+
float epsilon = 0.0001;
|
| 609 |
+
|
| 610 |
+
// Variables for feature confidence
|
| 611 |
+
ScoreComponentCollection confidenceCounts, mixedConfidenceCounts, featureLearningRates;
|
| 612 |
+
featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0); //initialise core learning rates
|
| 613 |
+
cerr << "Initial learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl;
|
| 614 |
+
|
| 615 |
+
for (size_t epoch = continue_epoch; epoch < epochs && !stop; ++epoch) {
|
| 616 |
+
if (shuffle) {
|
| 617 |
+
if (trainWithMultipleFolds || rank == 0) {
|
| 618 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", shuffling input sentences.." << endl;
|
| 619 |
+
RandomIndex rindex;
|
| 620 |
+
random_shuffle(order.begin(), order.end(), rindex);
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
#ifdef MPI_ENABLE
|
| 624 |
+
if (!trainWithMultipleFolds)
|
| 625 |
+
mpi::broadcast(world, order, 0);
|
| 626 |
+
#endif
|
| 627 |
+
|
| 628 |
+
// redo shards
|
| 629 |
+
if (trainWithMultipleFolds) {
|
| 630 |
+
size_t shardSize = order.size()/coresPerFold;
|
| 631 |
+
size_t shardStart = (size_t) (shardSize * (rank % coresPerFold));
|
| 632 |
+
size_t shardEnd = shardStart + shardSize;
|
| 633 |
+
if (rank % coresPerFold == coresPerFold - 1) { // last rank of each fold
|
| 634 |
+
shardEnd = order.size();
|
| 635 |
+
shardSize = shardEnd - shardStart;
|
| 636 |
+
}
|
| 637 |
+
VERBOSE(1, "Rank: " << rank << ", shard size: " << shardSize << endl);
|
| 638 |
+
VERBOSE(1, "Rank: " << rank << ", shard start: " << shardStart << " shard end: " << shardEnd << endl);
|
| 639 |
+
shard.resize(shardSize);
|
| 640 |
+
copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
|
| 641 |
+
batchSize = 1;
|
| 642 |
+
} else {
|
| 643 |
+
size_t shardSize = order.size()/size;
|
| 644 |
+
size_t shardStart = (size_t) (shardSize * rank);
|
| 645 |
+
size_t shardEnd = (size_t) (shardSize * (rank + 1));
|
| 646 |
+
if (rank == size - 1) {
|
| 647 |
+
shardEnd = order.size();
|
| 648 |
+
shardSize = shardEnd - shardStart;
|
| 649 |
+
}
|
| 650 |
+
VERBOSE(1, "Shard size: " << shardSize << endl);
|
| 651 |
+
VERBOSE(1, "Rank: " << rank << " Shard start: " << shardStart << " Shard end: " << shardEnd << endl);
|
| 652 |
+
shard.resize(shardSize);
|
| 653 |
+
copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
|
| 654 |
+
if (batchEqualsShard)
|
| 655 |
+
batchSize = shardSize;
|
| 656 |
+
}
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
// sum of violated constraints in an epoch
|
| 660 |
+
// sumStillViolatedConstraints = 0;
|
| 661 |
+
|
| 662 |
+
numberOfUpdatesThisEpoch = 0;
|
| 663 |
+
// Sum up weights over one epoch, final average uses weights from last epoch
|
| 664 |
+
if (!accumulateWeights) {
|
| 665 |
+
cumulativeWeights.ZeroAll();
|
| 666 |
+
cumulativeWeightsBinary.ZeroAll();
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
// number of weight dumps this epoch
|
| 670 |
+
size_t weightMixingThisEpoch = 0;
|
| 671 |
+
size_t weightEpochDump = 0;
|
| 672 |
+
|
| 673 |
+
size_t shardPosition = 0;
|
| 674 |
+
vector<size_t>::const_iterator sid = shard.begin();
|
| 675 |
+
while (sid != shard.end()) {
|
| 676 |
+
// feature values for hypotheses i,j (matrix: batchSize x 3*n x featureValues)
|
| 677 |
+
vector<vector<ScoreComponentCollection> > featureValues;
|
| 678 |
+
vector<vector<float> > bleuScores;
|
| 679 |
+
vector<vector<float> > modelScores;
|
| 680 |
+
|
| 681 |
+
// variables for hope-fear/perceptron setting
|
| 682 |
+
vector<vector<ScoreComponentCollection> > featureValuesHope;
|
| 683 |
+
vector<vector<ScoreComponentCollection> > featureValuesFear;
|
| 684 |
+
vector<vector<float> > bleuScoresHope;
|
| 685 |
+
vector<vector<float> > bleuScoresFear;
|
| 686 |
+
vector<vector<float> > modelScoresHope;
|
| 687 |
+
vector<vector<float> > modelScoresFear;
|
| 688 |
+
|
| 689 |
+
// get moses weights
|
| 690 |
+
ScoreComponentCollection mosesWeights = decoder->getWeights();
|
| 691 |
+
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", weights: " << mosesWeights << endl);
|
| 692 |
+
|
| 693 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 694 |
+
decoder->printBleuFeatureHistory(cerr);
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
// BATCHING: produce nbest lists for all input sentences in batch
|
| 698 |
+
vector<float> oracleBleuScores;
|
| 699 |
+
vector<float> oracleModelScores;
|
| 700 |
+
vector<vector<const Word*> > oneBests;
|
| 701 |
+
vector<ScoreComponentCollection> oracleFeatureValues;
|
| 702 |
+
vector<size_t> inputLengths;
|
| 703 |
+
vector<size_t> ref_ids;
|
| 704 |
+
size_t actualBatchSize = 0;
|
| 705 |
+
|
| 706 |
+
size_t examples_in_batch = 0;
|
| 707 |
+
bool skip_example = false;
|
| 708 |
+
for (size_t batchPosition = 0; batchPosition < batchSize && sid
|
| 709 |
+
!= shard.end(); ++batchPosition) {
|
| 710 |
+
string input;
|
| 711 |
+
if (trainWithMultipleFolds)
|
| 712 |
+
input = inputSentencesFolds[myFold][*sid];
|
| 713 |
+
else
|
| 714 |
+
input = inputSentences[*sid];
|
| 715 |
+
|
| 716 |
+
Moses::Sentence *sentence = new Sentence();
|
| 717 |
+
stringstream in(input + "\n");
|
| 718 |
+
const vector<FactorType> inputFactorOrder = staticData.GetInputFactorOrder();
|
| 719 |
+
sentence->Read(in,inputFactorOrder);
|
| 720 |
+
cerr << "\nRank " << rank << ", epoch " << epoch << ", input sentence " << *sid << ": \"";
|
| 721 |
+
sentence->Print(cerr);
|
| 722 |
+
cerr << "\"" << " (batch pos " << batchPosition << ")" << endl;
|
| 723 |
+
size_t current_input_length = (*sentence).GetSize();
|
| 724 |
+
|
| 725 |
+
if (epoch == 0 && (scaleByAvgInputLength || scaleByAvgInverseLength)) {
|
| 726 |
+
sumOfInputs += current_input_length;
|
| 727 |
+
++numberOfInputs;
|
| 728 |
+
avgInputLength = sumOfInputs/numberOfInputs;
|
| 729 |
+
decoder->setAvgInputLength(avgInputLength);
|
| 730 |
+
cerr << "Rank " << rank << ", epoch 0, average input length: " << avgInputLength << endl;
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
vector<ScoreComponentCollection> newFeatureValues;
|
| 734 |
+
vector<float> newScores;
|
| 735 |
+
if (model_hope_fear) {
|
| 736 |
+
featureValues.push_back(newFeatureValues);
|
| 737 |
+
bleuScores.push_back(newScores);
|
| 738 |
+
modelScores.push_back(newScores);
|
| 739 |
+
}
|
| 740 |
+
if (hope_fear || perceptron_update) {
|
| 741 |
+
featureValuesHope.push_back(newFeatureValues);
|
| 742 |
+
featureValuesFear.push_back(newFeatureValues);
|
| 743 |
+
bleuScoresHope.push_back(newScores);
|
| 744 |
+
bleuScoresFear.push_back(newScores);
|
| 745 |
+
modelScoresHope.push_back(newScores);
|
| 746 |
+
modelScoresFear.push_back(newScores);
|
| 747 |
+
if (historyBleu || simpleHistoryBleu || debug_model) {
|
| 748 |
+
featureValues.push_back(newFeatureValues);
|
| 749 |
+
bleuScores.push_back(newScores);
|
| 750 |
+
modelScores.push_back(newScores);
|
| 751 |
+
}
|
| 752 |
+
}
|
| 753 |
+
if (kbest) {
|
| 754 |
+
// for decoding
|
| 755 |
+
featureValues.push_back(newFeatureValues);
|
| 756 |
+
bleuScores.push_back(newScores);
|
| 757 |
+
modelScores.push_back(newScores);
|
| 758 |
+
|
| 759 |
+
// for storing selected examples
|
| 760 |
+
featureValuesHope.push_back(newFeatureValues);
|
| 761 |
+
featureValuesFear.push_back(newFeatureValues);
|
| 762 |
+
bleuScoresHope.push_back(newScores);
|
| 763 |
+
bleuScoresFear.push_back(newScores);
|
| 764 |
+
modelScoresHope.push_back(newScores);
|
| 765 |
+
modelScoresFear.push_back(newScores);
|
| 766 |
+
}
|
| 767 |
+
|
| 768 |
+
size_t ref_length;
|
| 769 |
+
float avg_ref_length;
|
| 770 |
+
|
| 771 |
+
if (print_weights)
|
| 772 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: " << mosesWeights << endl;
|
| 773 |
+
if (print_core_weights) {
|
| 774 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", current weights: ";
|
| 775 |
+
mosesWeights.PrintCoreFeatures();
|
| 776 |
+
cerr << endl;
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
// check LM weight
|
| 780 |
+
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 781 |
+
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
| 782 |
+
const StatefulFeatureFunction *ff = statefulFFs[i];
|
| 783 |
+
const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
|
| 784 |
+
|
| 785 |
+
if (lm) {
|
| 786 |
+
float lmWeight = mosesWeights.GetScoreForProducer(lm);
|
| 787 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", lm weight: " << lmWeight << endl;
|
| 788 |
+
if (lmWeight <= 0) {
|
| 789 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: language model weight should never be <= 0." << endl;
|
| 790 |
+
mosesWeights.Assign(lm, 0.1);
|
| 791 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", assign lm weights of 0.1" << endl;
|
| 792 |
+
}
|
| 793 |
+
}
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
// select inference scheme
|
| 797 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", real Bleu? " << realBleu << endl;
|
| 798 |
+
if (hope_fear || perceptron_update) {
|
| 799 |
+
// HOPE
|
| 800 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", " << hope_n <<
|
| 801 |
+
"best hope translations" << endl;
|
| 802 |
+
vector< vector<const Word*> > outputHope = decoder->getNBest(input, *sid, hope_n, 1.0, bleuWeight_hope,
|
| 803 |
+
featureValuesHope[batchPosition], bleuScoresHope[batchPosition], modelScoresHope[batchPosition],
|
| 804 |
+
1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 805 |
+
vector<const Word*> oracle = outputHope[0];
|
| 806 |
+
decoder->cleanup(chartDecoding);
|
| 807 |
+
ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
|
| 808 |
+
avg_ref_length = ref_length;
|
| 809 |
+
float hope_length_ratio = (float)oracle.size()/ref_length;
|
| 810 |
+
cerr << endl;
|
| 811 |
+
|
| 812 |
+
// count sparse features occurring in hope translation
|
| 813 |
+
featureValuesHope[batchPosition][0].IncrementSparseHopeFeatures();
|
| 814 |
+
|
| 815 |
+
vector<const Word*> bestModel;
|
| 816 |
+
if (debug_model || historyBleu || simpleHistoryBleu) {
|
| 817 |
+
// MODEL (for updating the history only, using dummy vectors)
|
| 818 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", 1best wrt model score (debug or history)" << endl;
|
| 819 |
+
vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, n, 0.0, bleuWeight,
|
| 820 |
+
featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
|
| 821 |
+
1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 822 |
+
bestModel = outputModel[0];
|
| 823 |
+
decoder->cleanup(chartDecoding);
|
| 824 |
+
cerr << endl;
|
| 825 |
+
ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
// FEAR
|
| 829 |
+
//float fear_length_ratio = 0;
|
| 830 |
+
float bleuRatioHopeFear = 0;
|
| 831 |
+
//int fearSize = 0;
|
| 832 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", " << fear_n << "best fear translations" << endl;
|
| 833 |
+
vector< vector<const Word*> > outputFear = decoder->getNBest(input, *sid, fear_n, -1.0, bleuWeight_fear,
|
| 834 |
+
featureValuesFear[batchPosition], bleuScoresFear[batchPosition], modelScoresFear[batchPosition],
|
| 835 |
+
1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 836 |
+
vector<const Word*> fear = outputFear[0];
|
| 837 |
+
decoder->cleanup(chartDecoding);
|
| 838 |
+
ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
|
| 839 |
+
avg_ref_length += ref_length;
|
| 840 |
+
avg_ref_length /= 2;
|
| 841 |
+
//fear_length_ratio = (float)fear.size()/ref_length;
|
| 842 |
+
//fearSize = (int)fear.size();
|
| 843 |
+
cerr << endl;
|
| 844 |
+
for (size_t i = 0; i < fear.size(); ++i)
|
| 845 |
+
delete fear[i];
|
| 846 |
+
|
| 847 |
+
// count sparse features occurring in fear translation
|
| 848 |
+
featureValuesFear[batchPosition][0].IncrementSparseFearFeatures();
|
| 849 |
+
|
| 850 |
+
// Bleu-related example selection
|
| 851 |
+
bool skip = false;
|
| 852 |
+
bleuRatioHopeFear = bleuScoresHope[batchPosition][0] / bleuScoresFear[batchPosition][0];
|
| 853 |
+
if (minBleuRatio != -1 && bleuRatioHopeFear < minBleuRatio)
|
| 854 |
+
skip = true;
|
| 855 |
+
if(maxBleuRatio != -1 && bleuRatioHopeFear > maxBleuRatio)
|
| 856 |
+
skip = true;
|
| 857 |
+
|
| 858 |
+
// sanity check
|
| 859 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 860 |
+
if (bleuScores[batchPosition][0] > bleuScoresHope[batchPosition][0] &&
|
| 861 |
+
modelScores[batchPosition][0] > modelScoresHope[batchPosition][0]) {
|
| 862 |
+
if (abs(bleuScores[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon &&
|
| 863 |
+
abs(modelScores[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) {
|
| 864 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: MODEL translation better than HOPE translation." << endl;
|
| 865 |
+
skip = true;
|
| 866 |
+
}
|
| 867 |
+
}
|
| 868 |
+
if (bleuScoresFear[batchPosition][0] > bleuScores[batchPosition][0] &&
|
| 869 |
+
modelScoresFear[batchPosition][0] > modelScores[batchPosition][0]) {
|
| 870 |
+
if (abs(bleuScoresFear[batchPosition][0] - bleuScores[batchPosition][0]) > epsilon &&
|
| 871 |
+
abs(modelScoresFear[batchPosition][0] - modelScores[batchPosition][0]) > epsilon) {
|
| 872 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than MODEL translation." << endl;
|
| 873 |
+
skip = true;
|
| 874 |
+
}
|
| 875 |
+
}
|
| 876 |
+
}
|
| 877 |
+
if (bleuScoresFear[batchPosition][0] > bleuScoresHope[batchPosition][0]) {
|
| 878 |
+
if (abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) > epsilon) {
|
| 879 |
+
// check if it's an error or a warning
|
| 880 |
+
skip = true;
|
| 881 |
+
if (modelScoresFear[batchPosition][0] > modelScoresHope[batchPosition][0] && abs(modelScoresFear[batchPosition][0] - modelScoresHope[batchPosition][0]) > epsilon) {
|
| 882 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", ERROR: FEAR translation better than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <<endl;
|
| 883 |
+
} else {
|
| 884 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", WARNING: FEAR translation has better Bleu than HOPE translation. (abs-diff: " << abs(bleuScoresFear[batchPosition][0] - bleuScoresHope[batchPosition][0]) << ")" <<endl;
|
| 885 |
+
}
|
| 886 |
+
}
|
| 887 |
+
}
|
| 888 |
+
|
| 889 |
+
if (skip) {
|
| 890 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", skip example (" << hope_length_ratio << ", " << bleuRatioHopeFear << ").. " << endl;
|
| 891 |
+
featureValuesHope[batchPosition].clear();
|
| 892 |
+
featureValuesFear[batchPosition].clear();
|
| 893 |
+
bleuScoresHope[batchPosition].clear();
|
| 894 |
+
bleuScoresFear[batchPosition].clear();
|
| 895 |
+
if (historyBleu || simpleHistoryBleu || debug_model) {
|
| 896 |
+
featureValues[batchPosition].clear();
|
| 897 |
+
bleuScores[batchPosition].clear();
|
| 898 |
+
}
|
| 899 |
+
} else {
|
| 900 |
+
examples_in_batch++;
|
| 901 |
+
|
| 902 |
+
// needed for history
|
| 903 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 904 |
+
inputLengths.push_back(current_input_length);
|
| 905 |
+
ref_ids.push_back(*sid);
|
| 906 |
+
oneBests.push_back(bestModel);
|
| 907 |
+
}
|
| 908 |
+
}
|
| 909 |
+
}
|
| 910 |
+
if (model_hope_fear) {
|
| 911 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best hope translations" << endl;
|
| 912 |
+
size_t oraclePos = featureValues[batchPosition].size();
|
| 913 |
+
decoder->getNBest(input, *sid, n, 1.0, bleuWeight_hope,
|
| 914 |
+
featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
|
| 915 |
+
0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 916 |
+
//vector<const Word*> oracle = outputHope[0];
|
| 917 |
+
// needed for history
|
| 918 |
+
inputLengths.push_back(current_input_length);
|
| 919 |
+
ref_ids.push_back(*sid);
|
| 920 |
+
decoder->cleanup(chartDecoding);
|
| 921 |
+
//ref_length = decoder->getClosestReferenceLength(*sid, oracle.size());
|
| 922 |
+
//float hope_length_ratio = (float)oracle.size()/ref_length;
|
| 923 |
+
cerr << endl;
|
| 924 |
+
|
| 925 |
+
oracleFeatureValues.push_back(featureValues[batchPosition][oraclePos]);
|
| 926 |
+
oracleBleuScores.push_back(bleuScores[batchPosition][oraclePos]);
|
| 927 |
+
oracleModelScores.push_back(modelScores[batchPosition][oraclePos]);
|
| 928 |
+
|
| 929 |
+
// MODEL
|
| 930 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl;
|
| 931 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 932 |
+
vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, n, 0.0,
|
| 933 |
+
bleuWeight, featureValues[batchPosition], bleuScores[batchPosition],
|
| 934 |
+
modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 935 |
+
vector<const Word*> bestModel = outputModel[0];
|
| 936 |
+
oneBests.push_back(bestModel);
|
| 937 |
+
inputLengths.push_back(current_input_length);
|
| 938 |
+
ref_ids.push_back(*sid);
|
| 939 |
+
} else {
|
| 940 |
+
decoder->getNBest(input, *sid, n, 0.0, bleuWeight,
|
| 941 |
+
featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
|
| 942 |
+
0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 943 |
+
}
|
| 944 |
+
decoder->cleanup(chartDecoding);
|
| 945 |
+
//ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
|
| 946 |
+
//float model_length_ratio = (float)bestModel.size()/ref_length;
|
| 947 |
+
cerr << endl;
|
| 948 |
+
|
| 949 |
+
// FEAR
|
| 950 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best fear translations" << endl;
|
| 951 |
+
decoder->getNBest(input, *sid, n, -1.0, bleuWeight_fear,
|
| 952 |
+
featureValues[batchPosition], bleuScores[batchPosition], modelScores[batchPosition],
|
| 953 |
+
0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 954 |
+
decoder->cleanup(chartDecoding);
|
| 955 |
+
//ref_length = decoder->getClosestReferenceLength(*sid, fear.size());
|
| 956 |
+
//float fear_length_ratio = (float)fear.size()/ref_length;
|
| 957 |
+
|
| 958 |
+
examples_in_batch++;
|
| 959 |
+
}
|
| 960 |
+
if (kbest) {
|
| 961 |
+
// MODEL
|
| 962 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", " << n << "best wrt model score" << endl;
|
| 963 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 964 |
+
vector< vector<const Word*> > outputModel = decoder->getNBest(input, *sid, n, 0.0,
|
| 965 |
+
bleuWeight, featureValues[batchPosition], bleuScores[batchPosition],
|
| 966 |
+
modelScores[batchPosition], 1, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 967 |
+
vector<const Word*> bestModel = outputModel[0];
|
| 968 |
+
oneBests.push_back(bestModel);
|
| 969 |
+
inputLengths.push_back(current_input_length);
|
| 970 |
+
ref_ids.push_back(*sid);
|
| 971 |
+
} else {
|
| 972 |
+
decoder->getNBest(input, *sid, n, 0.0, bleuWeight,
|
| 973 |
+
featureValues[batchPosition], bleuScores[batchPosition],
|
| 974 |
+
modelScores[batchPosition], 0, realBleu, distinctNbest, avgRefLength, rank, epoch, "");
|
| 975 |
+
}
|
| 976 |
+
decoder->cleanup(chartDecoding);
|
| 977 |
+
//ref_length = decoder->getClosestReferenceLength(*sid, bestModel.size());
|
| 978 |
+
//float model_length_ratio = (float)bestModel.size()/ref_length;
|
| 979 |
+
cerr << endl;
|
| 980 |
+
|
| 981 |
+
examples_in_batch++;
|
| 982 |
+
|
| 983 |
+
HypothesisQueue queueHope(hope_n);
|
| 984 |
+
HypothesisQueue queueFear(fear_n);
|
| 985 |
+
cerr << endl;
|
| 986 |
+
if (most_violated || all_violated) {
|
| 987 |
+
float bleuHope = -1000;
|
| 988 |
+
float bleuFear = 1000;
|
| 989 |
+
int indexHope = -1;
|
| 990 |
+
int indexFear = -1;
|
| 991 |
+
|
| 992 |
+
vector<float> bleuHopeList;
|
| 993 |
+
vector<float> bleuFearList;
|
| 994 |
+
vector<float> indexHopeList;
|
| 995 |
+
vector<float> indexFearList;
|
| 996 |
+
|
| 997 |
+
if (most_violated)
|
| 998 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with most violated constraint" << endl;
|
| 999 |
+
else if (all_violated)
|
| 1000 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with violated constraints";
|
| 1001 |
+
else
|
| 1002 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", pick all pairs with hope";
|
| 1003 |
+
|
| 1004 |
+
// find best hope, then find fear that violates our constraint most
|
| 1005 |
+
for (size_t i=0; i<bleuScores[batchPosition].size(); ++i) {
|
| 1006 |
+
if (abs(bleuScores[batchPosition][i] - bleuHope) < epsilon) { // equal bleu scores
|
| 1007 |
+
if (modelScores[batchPosition][i] > modelScores[batchPosition][indexHope]) {
|
| 1008 |
+
if (abs(modelScores[batchPosition][i] - modelScores[batchPosition][indexHope]) > epsilon) {
|
| 1009 |
+
// better model score
|
| 1010 |
+
bleuHope = bleuScores[batchPosition][i];
|
| 1011 |
+
indexHope = i;
|
| 1012 |
+
}
|
| 1013 |
+
}
|
| 1014 |
+
} else if (bleuScores[batchPosition][i] > bleuHope) { // better than current best
|
| 1015 |
+
bleuHope = bleuScores[batchPosition][i];
|
| 1016 |
+
indexHope = i;
|
| 1017 |
+
}
|
| 1018 |
+
}
|
| 1019 |
+
|
| 1020 |
+
float currentViolation = 0;
|
| 1021 |
+
for (size_t i=0; i<bleuScores[batchPosition].size(); ++i) {
|
| 1022 |
+
float bleuDiff = bleuHope - bleuScores[batchPosition][i];
|
| 1023 |
+
float modelDiff = modelScores[batchPosition][indexHope] - modelScores[batchPosition][i];
|
| 1024 |
+
if ((bleuDiff > epsilon) && (modelDiff < bleuDiff)) {
|
| 1025 |
+
float diff = bleuDiff - modelDiff;
|
| 1026 |
+
if (diff > epsilon) {
|
| 1027 |
+
if (all_violated) {
|
| 1028 |
+
cerr << ".. adding pair";
|
| 1029 |
+
bleuHopeList.push_back(bleuHope);
|
| 1030 |
+
bleuFearList.push_back(bleuScores[batchPosition][i]);
|
| 1031 |
+
indexHopeList.push_back(indexHope);
|
| 1032 |
+
indexFearList.push_back(i);
|
| 1033 |
+
} else if (most_violated && diff > currentViolation) {
|
| 1034 |
+
currentViolation = diff;
|
| 1035 |
+
bleuFear = bleuScores[batchPosition][i];
|
| 1036 |
+
indexFear = i;
|
| 1037 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", current violation: " << currentViolation << " (" << modelDiff << " >= " << bleuDiff << ")" << endl;
|
| 1038 |
+
}
|
| 1039 |
+
}
|
| 1040 |
+
}
|
| 1041 |
+
}
|
| 1042 |
+
|
| 1043 |
+
if (most_violated) {
|
| 1044 |
+
if (currentViolation > 0) {
|
| 1045 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", adding pair with violation " << currentViolation << endl;
|
| 1046 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << bleuHope << " (" << indexHope << "), fear: " << bleuFear << " (" << indexFear << ")" << endl;
|
| 1047 |
+
bleuScoresHope[batchPosition].push_back(bleuHope);
|
| 1048 |
+
bleuScoresFear[batchPosition].push_back(bleuFear);
|
| 1049 |
+
featureValuesHope[batchPosition].push_back(featureValues[batchPosition][indexHope]);
|
| 1050 |
+
featureValuesFear[batchPosition].push_back(featureValues[batchPosition][indexFear]);
|
| 1051 |
+
float modelScoreHope = modelScores[batchPosition][indexHope];
|
| 1052 |
+
float modelScoreFear = modelScores[batchPosition][indexFear];
|
| 1053 |
+
if (most_violated_reg) {
|
| 1054 |
+
// reduce model score difference by factor ~0.5
|
| 1055 |
+
float reg = currentViolation/4;
|
| 1056 |
+
modelScoreHope += abs(reg);
|
| 1057 |
+
modelScoreFear -= abs(reg);
|
| 1058 |
+
float newViolation = (bleuHope - bleuFear) - (modelScoreHope - modelScoreFear);
|
| 1059 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", regularized violation: " << newViolation << endl;
|
| 1060 |
+
}
|
| 1061 |
+
modelScoresHope[batchPosition].push_back(modelScoreHope);
|
| 1062 |
+
modelScoresFear[batchPosition].push_back(modelScoreFear);
|
| 1063 |
+
|
| 1064 |
+
featureValues[batchPosition][indexHope].IncrementSparseHopeFeatures();
|
| 1065 |
+
featureValues[batchPosition][indexFear].IncrementSparseFearFeatures();
|
| 1066 |
+
} else {
|
| 1067 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", no violated constraint found." << endl;
|
| 1068 |
+
skip_example = 1;
|
| 1069 |
+
}
|
| 1070 |
+
} else cerr << endl;
|
| 1071 |
+
}
|
| 1072 |
+
if (max_bleu_diff) {
|
| 1073 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", pick pair with max Bleu diff from list: " << bleuScores[batchPosition].size() << endl;
|
| 1074 |
+
for (size_t i=0; i<bleuScores[batchPosition].size(); ++i) {
|
| 1075 |
+
float hopeScore = bleuScores[batchPosition][i];
|
| 1076 |
+
if (modelPlusBleu) hopeScore += modelScores[batchPosition][i];
|
| 1077 |
+
BleuIndexPair hope(hopeScore, i);
|
| 1078 |
+
queueHope.Push(hope);
|
| 1079 |
+
|
| 1080 |
+
float fearScore = -1*(bleuScores[batchPosition][i]);
|
| 1081 |
+
if (modelPlusBleu) fearScore += modelScores[batchPosition][i];
|
| 1082 |
+
BleuIndexPair fear(fearScore, i);
|
| 1083 |
+
queueFear.Push(fear);
|
| 1084 |
+
}
|
| 1085 |
+
skip_example = 0;
|
| 1086 |
+
}
|
| 1087 |
+
cerr << endl;
|
| 1088 |
+
|
| 1089 |
+
vector<BleuIndexPair> hopeList, fearList;
|
| 1090 |
+
for (size_t i=0; i<hope_n && !queueHope.Empty(); ++i) hopeList.push_back(queueHope.Pop());
|
| 1091 |
+
for (size_t i=0; i<fear_n && !queueFear.Empty(); ++i) fearList.push_back(queueFear.Pop());
|
| 1092 |
+
for (size_t i=0; i<hopeList.size(); ++i) {
|
| 1093 |
+
//float bleuHope = hopeList[i].first;
|
| 1094 |
+
size_t indexHope = hopeList[i].second;
|
| 1095 |
+
float bleuHope = bleuScores[batchPosition][indexHope];
|
| 1096 |
+
for (size_t j=0; j<fearList.size(); ++j) {
|
| 1097 |
+
//float bleuFear = -1*(fearList[j].first);
|
| 1098 |
+
size_t indexFear = fearList[j].second;
|
| 1099 |
+
float bleuFear = bleuScores[batchPosition][indexFear];
|
| 1100 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << bleuHope << " (" << indexHope << "), fear: " << bleuFear << " (" << indexFear << ")" << endl;
|
| 1101 |
+
bleuScoresHope[batchPosition].push_back(bleuHope);
|
| 1102 |
+
bleuScoresFear[batchPosition].push_back(bleuFear);
|
| 1103 |
+
featureValuesHope[batchPosition].push_back(featureValues[batchPosition][indexHope]);
|
| 1104 |
+
featureValuesFear[batchPosition].push_back(featureValues[batchPosition][indexFear]);
|
| 1105 |
+
float modelScoreHope = modelScores[batchPosition][indexHope];
|
| 1106 |
+
float modelScoreFear = modelScores[batchPosition][indexFear];
|
| 1107 |
+
|
| 1108 |
+
modelScoresHope[batchPosition].push_back(modelScoreHope);
|
| 1109 |
+
modelScoresFear[batchPosition].push_back(modelScoreFear);
|
| 1110 |
+
|
| 1111 |
+
featureValues[batchPosition][indexHope].IncrementSparseHopeFeatures();
|
| 1112 |
+
featureValues[batchPosition][indexFear].IncrementSparseFearFeatures();
|
| 1113 |
+
}
|
| 1114 |
+
}
|
| 1115 |
+
if (!makePairs)
|
| 1116 |
+
cerr << "Rank " << rank << ", epoch " << epoch << "summing up hope and fear vectors, no pairs" << endl;
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
// next input sentence
|
| 1120 |
+
++sid;
|
| 1121 |
+
++actualBatchSize;
|
| 1122 |
+
++shardPosition;
|
| 1123 |
+
} // end of batch loop
|
| 1124 |
+
|
| 1125 |
+
if (examples_in_batch == 0 || (kbest && skip_example)) {
|
| 1126 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", batch is empty." << endl;
|
| 1127 |
+
} else {
|
| 1128 |
+
vector<vector<float> > losses(actualBatchSize);
|
| 1129 |
+
if (model_hope_fear) {
|
| 1130 |
+
// Set loss for each sentence as BLEU(oracle) - BLEU(hypothesis)
|
| 1131 |
+
for (size_t batchPosition = 0; batchPosition < actualBatchSize; ++batchPosition) {
|
| 1132 |
+
for (size_t j = 0; j < bleuScores[batchPosition].size(); ++j) {
|
| 1133 |
+
losses[batchPosition].push_back(oracleBleuScores[batchPosition] - bleuScores[batchPosition][j]);
|
| 1134 |
+
}
|
| 1135 |
+
}
|
| 1136 |
+
}
|
| 1137 |
+
|
| 1138 |
+
// set weight for bleu feature to 0 before optimizing
|
| 1139 |
+
vector<FeatureFunction*>::const_iterator iter;
|
| 1140 |
+
const vector<FeatureFunction*> &featureFunctions2 = FeatureFunction::GetFeatureFunctions();
|
| 1141 |
+
for (iter = featureFunctions2.begin(); iter != featureFunctions2.end(); ++iter) {
|
| 1142 |
+
if ((*iter)->GetScoreProducerDescription() == "BleuScoreFeature") {
|
| 1143 |
+
mosesWeights.Assign(*iter, 0);
|
| 1144 |
+
break;
|
| 1145 |
+
}
|
| 1146 |
+
}
|
| 1147 |
+
|
| 1148 |
+
// scale LM feature (to avoid rapid changes)
|
| 1149 |
+
if (scale_lm) {
|
| 1150 |
+
cerr << "scale lm" << endl;
|
| 1151 |
+
const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
| 1152 |
+
for (size_t i = 0; i < statefulFFs.size(); ++i) {
|
| 1153 |
+
const StatefulFeatureFunction *ff = statefulFFs[i];
|
| 1154 |
+
const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
|
| 1155 |
+
|
| 1156 |
+
if (lm) {
|
| 1157 |
+
// scale down score
|
| 1158 |
+
if (model_hope_fear) {
|
| 1159 |
+
scaleFeatureScore(lm, scale_lm_factor, featureValues, rank, epoch);
|
| 1160 |
+
} else {
|
| 1161 |
+
scaleFeatureScore(lm, scale_lm_factor, featureValuesHope, rank, epoch);
|
| 1162 |
+
scaleFeatureScore(lm, scale_lm_factor, featureValuesFear, rank, epoch);
|
| 1163 |
+
}
|
| 1164 |
+
}
|
| 1165 |
+
}
|
| 1166 |
+
}
|
| 1167 |
+
|
| 1168 |
+
// scale WP
|
| 1169 |
+
if (scale_wp) {
|
| 1170 |
+
// scale up weight
|
| 1171 |
+
WordPenaltyProducer &wp = WordPenaltyProducer::InstanceNonConst();
|
| 1172 |
+
|
| 1173 |
+
// scale down score
|
| 1174 |
+
if (model_hope_fear) {
|
| 1175 |
+
scaleFeatureScore(&wp, scale_wp_factor, featureValues, rank, epoch);
|
| 1176 |
+
} else {
|
| 1177 |
+
scaleFeatureScore(&wp, scale_wp_factor, featureValuesHope, rank, epoch);
|
| 1178 |
+
scaleFeatureScore(&wp, scale_wp_factor, featureValuesFear, rank, epoch);
|
| 1179 |
+
}
|
| 1180 |
+
}
|
| 1181 |
+
|
| 1182 |
+
// print out the feature values
|
| 1183 |
+
if (print_feature_values) {
|
| 1184 |
+
cerr << "\nRank " << rank << ", epoch " << epoch << ", feature values: " << endl;
|
| 1185 |
+
if (model_hope_fear) printFeatureValues(featureValues);
|
| 1186 |
+
else {
|
| 1187 |
+
cerr << "hope: " << endl;
|
| 1188 |
+
printFeatureValues(featureValuesHope);
|
| 1189 |
+
cerr << "fear: " << endl;
|
| 1190 |
+
printFeatureValues(featureValuesFear);
|
| 1191 |
+
}
|
| 1192 |
+
}
|
| 1193 |
+
|
| 1194 |
+
// apply learning rates to feature vectors before optimization
|
| 1195 |
+
if (feature_confidence) {
|
| 1196 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", apply feature learning rates with decays " << decay_core << "/" << decay_sparse << ": " << featureLearningRates << endl;
|
| 1197 |
+
if (model_hope_fear) {
|
| 1198 |
+
applyPerFeatureLearningRates(featureValues, featureLearningRates, sparse_r0);
|
| 1199 |
+
} else {
|
| 1200 |
+
applyPerFeatureLearningRates(featureValuesHope, featureLearningRates, sparse_r0);
|
| 1201 |
+
applyPerFeatureLearningRates(featureValuesFear, featureLearningRates, sparse_r0);
|
| 1202 |
+
}
|
| 1203 |
+
} else {
|
| 1204 |
+
// apply fixed learning rates
|
| 1205 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", apply fixed learning rates, core: " << core_r0 << ", sparse: " << sparse_r0 << endl;
|
| 1206 |
+
if (core_r0 != 1.0 || sparse_r0 != 1.0) {
|
| 1207 |
+
if (model_hope_fear) {
|
| 1208 |
+
applyLearningRates(featureValues, core_r0, sparse_r0);
|
| 1209 |
+
} else {
|
| 1210 |
+
applyLearningRates(featureValuesHope, core_r0, sparse_r0);
|
| 1211 |
+
applyLearningRates(featureValuesFear, core_r0, sparse_r0);
|
| 1212 |
+
}
|
| 1213 |
+
}
|
| 1214 |
+
}
|
| 1215 |
+
|
| 1216 |
+
// Run optimiser on batch:
|
| 1217 |
+
VERBOSE(1, "\nRank " << rank << ", epoch " << epoch << ", run optimiser:" << endl);
|
| 1218 |
+
size_t update_status = 1;
|
| 1219 |
+
ScoreComponentCollection weightUpdate;
|
| 1220 |
+
if (perceptron_update) {
|
| 1221 |
+
vector<vector<float> > dummy1;
|
| 1222 |
+
update_status = optimiser->updateWeightsHopeFear( weightUpdate, featureValuesHope,
|
| 1223 |
+
featureValuesFear, dummy1, dummy1, dummy1, dummy1, learning_rate, rank, epoch);
|
| 1224 |
+
} else if (hope_fear) {
|
| 1225 |
+
if (bleuScoresHope[0][0] >= min_oracle_bleu) {
|
| 1226 |
+
if (hope_n == 1 && fear_n ==1 && batchSize == 1 && !hildreth) {
|
| 1227 |
+
update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(weightUpdate,
|
| 1228 |
+
featureValuesHope[0][0], featureValuesFear[0][0], bleuScoresHope[0][0],
|
| 1229 |
+
bleuScoresFear[0][0], modelScoresHope[0][0], modelScoresFear[0][0], learning_rate, rank, epoch);
|
| 1230 |
+
} else
|
| 1231 |
+
update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope,
|
| 1232 |
+
featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope,
|
| 1233 |
+
modelScoresFear, learning_rate, rank, epoch);
|
| 1234 |
+
} else
|
| 1235 |
+
update_status = 1;
|
| 1236 |
+
} else if (kbest) {
|
| 1237 |
+
if (batchSize == 1 && featureValuesHope[0].size() == 1 && !hildreth) {
|
| 1238 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl;
|
| 1239 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl;
|
| 1240 |
+
update_status = ((MiraOptimiser*) optimiser)->updateWeightsAnalytically(
|
| 1241 |
+
weightUpdate, featureValuesHope[0][0], featureValuesFear[0][0],
|
| 1242 |
+
bleuScoresHope[0][0], bleuScoresFear[0][0], modelScoresHope[0][0],
|
| 1243 |
+
modelScoresFear[0][0], learning_rate, rank, epoch);
|
| 1244 |
+
} else {
|
| 1245 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", model score hope: " << modelScoresHope[0][0] << endl;
|
| 1246 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", model score fear: " << modelScoresFear[0][0] << endl;
|
| 1247 |
+
update_status = optimiser->updateWeightsHopeFear(weightUpdate, featureValuesHope,
|
| 1248 |
+
featureValuesFear, bleuScoresHope, bleuScoresFear, modelScoresHope,
|
| 1249 |
+
modelScoresFear, learning_rate, rank, epoch);
|
| 1250 |
+
}
|
| 1251 |
+
} else {
|
| 1252 |
+
// model_hope_fear
|
| 1253 |
+
update_status = ((MiraOptimiser*) optimiser)->updateWeights(weightUpdate,
|
| 1254 |
+
featureValues, losses, bleuScores, modelScores, oracleFeatureValues,
|
| 1255 |
+
oracleBleuScores, oracleModelScores, learning_rate, rank, epoch);
|
| 1256 |
+
}
|
| 1257 |
+
|
| 1258 |
+
// sumStillViolatedConstraints += update_status;
|
| 1259 |
+
|
| 1260 |
+
if (update_status == 0) { // if weights were updated
|
| 1261 |
+
// apply weight update
|
| 1262 |
+
if (debug)
|
| 1263 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << weightUpdate << endl;
|
| 1264 |
+
|
| 1265 |
+
if (feature_confidence) {
|
| 1266 |
+
// update confidence counts based on weight update
|
| 1267 |
+
confidenceCounts.UpdateConfidenceCounts(weightUpdate, signed_counts);
|
| 1268 |
+
|
| 1269 |
+
// update feature learning rates
|
| 1270 |
+
featureLearningRates.UpdateLearningRates(decay_core, decay_sparse, confidenceCounts, core_r0, sparse_r0);
|
| 1271 |
+
}
|
| 1272 |
+
|
| 1273 |
+
// apply weight update to Moses weights
|
| 1274 |
+
mosesWeights.PlusEquals(weightUpdate);
|
| 1275 |
+
|
| 1276 |
+
if (normaliseWeights)
|
| 1277 |
+
mosesWeights.L1Normalise();
|
| 1278 |
+
|
| 1279 |
+
cumulativeWeights.PlusEquals(mosesWeights);
|
| 1280 |
+
if (sparseAverage) {
|
| 1281 |
+
ScoreComponentCollection binary;
|
| 1282 |
+
binary.SetToBinaryOf(mosesWeights);
|
| 1283 |
+
cumulativeWeightsBinary.PlusEquals(binary);
|
| 1284 |
+
}
|
| 1285 |
+
|
| 1286 |
+
++numberOfUpdates;
|
| 1287 |
+
++numberOfUpdatesThisEpoch;
|
| 1288 |
+
if (averageWeights) {
|
| 1289 |
+
ScoreComponentCollection averageWeights(cumulativeWeights);
|
| 1290 |
+
if (accumulateWeights) {
|
| 1291 |
+
averageWeights.DivideEquals(numberOfUpdates);
|
| 1292 |
+
} else {
|
| 1293 |
+
averageWeights.DivideEquals(numberOfUpdatesThisEpoch);
|
| 1294 |
+
}
|
| 1295 |
+
|
| 1296 |
+
mosesWeights = averageWeights;
|
| 1297 |
+
}
|
| 1298 |
+
|
| 1299 |
+
// set new Moses weights
|
| 1300 |
+
decoder->setWeights(mosesWeights);
|
| 1301 |
+
//cerr << "Rank " << rank << ", epoch " << epoch << ", new weights: " << mosesWeights << endl;
|
| 1302 |
+
}
|
| 1303 |
+
|
| 1304 |
+
// update history (for approximate document Bleu)
|
| 1305 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 1306 |
+
for (size_t i = 0; i < oneBests.size(); ++i)
|
| 1307 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", update history with 1best length: " << oneBests[i].size() << " ";
|
| 1308 |
+
decoder->updateHistory(oneBests, inputLengths, ref_ids, rank, epoch);
|
| 1309 |
+
deleteTranslations(oneBests);
|
| 1310 |
+
}
|
| 1311 |
+
} // END TRANSLATE AND UPDATE BATCH
|
| 1312 |
+
|
| 1313 |
+
// size of all shards except for the last one
|
| 1314 |
+
size_t generalShardSize;
|
| 1315 |
+
if (trainWithMultipleFolds)
|
| 1316 |
+
generalShardSize = order.size()/coresPerFold;
|
| 1317 |
+
else
|
| 1318 |
+
generalShardSize = order.size()/size;
|
| 1319 |
+
|
| 1320 |
+
size_t mixing_base = mixingFrequency == 0 ? 0 : generalShardSize / mixingFrequency;
|
| 1321 |
+
size_t dumping_base = weightDumpFrequency == 0 ? 0 : generalShardSize / weightDumpFrequency;
|
| 1322 |
+
bool mix = evaluateModulo(shardPosition, mixing_base, actualBatchSize);
|
| 1323 |
+
|
| 1324 |
+
// mix weights?
|
| 1325 |
+
if (mix) {
|
| 1326 |
+
#ifdef MPI_ENABLE
|
| 1327 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", mixing weights.. " << endl;
|
| 1328 |
+
// collect all weights in mixedWeights and divide by number of processes
|
| 1329 |
+
mpi::reduce(world, mosesWeights, mixedWeights, SCCPlus(), 0);
|
| 1330 |
+
|
| 1331 |
+
// mix confidence counts
|
| 1332 |
+
//mpi::reduce(world, confidenceCounts, mixedConfidenceCounts, SCCPlus(), 0);
|
| 1333 |
+
ScoreComponentCollection totalBinary;
|
| 1334 |
+
if (sparseAverage) {
|
| 1335 |
+
ScoreComponentCollection binary;
|
| 1336 |
+
binary.SetToBinaryOf(mosesWeights);
|
| 1337 |
+
mpi::reduce(world, binary, totalBinary, SCCPlus(), 0);
|
| 1338 |
+
}
|
| 1339 |
+
if (rank == 0) {
|
| 1340 |
+
// divide by number of processes
|
| 1341 |
+
if (sparseNoAverage)
|
| 1342 |
+
mixedWeights.CoreDivideEquals(size); // average only core weights
|
| 1343 |
+
else if (sparseAverage)
|
| 1344 |
+
mixedWeights.DivideEquals(totalBinary);
|
| 1345 |
+
else
|
| 1346 |
+
mixedWeights.DivideEquals(size);
|
| 1347 |
+
|
| 1348 |
+
// divide confidence counts
|
| 1349 |
+
//mixedConfidenceCounts.DivideEquals(size);
|
| 1350 |
+
|
| 1351 |
+
// normalise weights after averaging
|
| 1352 |
+
if (normaliseWeights) {
|
| 1353 |
+
mixedWeights.L1Normalise();
|
| 1354 |
+
}
|
| 1355 |
+
|
| 1356 |
+
++weightMixingThisEpoch;
|
| 1357 |
+
|
| 1358 |
+
if (pruneZeroWeights) {
|
| 1359 |
+
size_t pruned = mixedWeights.PruneZeroWeightFeatures();
|
| 1360 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1361 |
+
<< pruned << " zero-weighted features pruned from mixedWeights." << endl;
|
| 1362 |
+
|
| 1363 |
+
pruned = cumulativeWeights.PruneZeroWeightFeatures();
|
| 1364 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1365 |
+
<< pruned << " zero-weighted features pruned from cumulativeWeights." << endl;
|
| 1366 |
+
}
|
| 1367 |
+
|
| 1368 |
+
if (featureCutoff != -1 && weightMixingThisEpoch == mixingFrequency) {
|
| 1369 |
+
size_t pruned = mixedWeights.PruneSparseFeatures(featureCutoff);
|
| 1370 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1371 |
+
<< pruned << " features pruned from mixedWeights." << endl;
|
| 1372 |
+
|
| 1373 |
+
pruned = cumulativeWeights.PruneSparseFeatures(featureCutoff);
|
| 1374 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1375 |
+
<< pruned << " features pruned from cumulativeWeights." << endl;
|
| 1376 |
+
}
|
| 1377 |
+
|
| 1378 |
+
if (weightMixingThisEpoch == mixingFrequency || reg_on_every_mix) {
|
| 1379 |
+
if (l1_regularize) {
|
| 1380 |
+
size_t pruned;
|
| 1381 |
+
if (l1_reg_sparse)
|
| 1382 |
+
pruned = mixedWeights.SparseL1Regularize(l1_lambda);
|
| 1383 |
+
else
|
| 1384 |
+
pruned = mixedWeights.L1Regularize(l1_lambda);
|
| 1385 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1386 |
+
<< "l1-reg. on mixedWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl;
|
| 1387 |
+
}
|
| 1388 |
+
if (l2_regularize) {
|
| 1389 |
+
if (l2_reg_sparse)
|
| 1390 |
+
mixedWeights.SparseL2Regularize(l2_lambda);
|
| 1391 |
+
else
|
| 1392 |
+
mixedWeights.L2Regularize(l2_lambda);
|
| 1393 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1394 |
+
<< "l2-reg. on mixedWeights with lambda=" << l2_lambda << endl;
|
| 1395 |
+
}
|
| 1396 |
+
}
|
| 1397 |
+
}
|
| 1398 |
+
|
| 1399 |
+
// broadcast average weights from process 0
|
| 1400 |
+
mpi::broadcast(world, mixedWeights, 0);
|
| 1401 |
+
decoder->setWeights(mixedWeights);
|
| 1402 |
+
mosesWeights = mixedWeights;
|
| 1403 |
+
|
| 1404 |
+
// broadcast summed confidence counts
|
| 1405 |
+
//mpi::broadcast(world, mixedConfidenceCounts, 0);
|
| 1406 |
+
//confidenceCounts = mixedConfidenceCounts;
|
| 1407 |
+
#endif
|
| 1408 |
+
#ifndef MPI_ENABLE
|
| 1409 |
+
//cerr << "\nRank " << rank << ", no mixing, weights: " << mosesWeights << endl;
|
| 1410 |
+
mixedWeights = mosesWeights;
|
| 1411 |
+
#endif
|
| 1412 |
+
} // end mixing
|
| 1413 |
+
|
| 1414 |
+
// Dump weights?
|
| 1415 |
+
if (trainWithMultipleFolds || weightEpochDump == weightDumpFrequency) {
|
| 1416 |
+
// dump mixed weights at end of every epoch to enable continuing a crashed experiment
|
| 1417 |
+
// (for jackknife every time the weights are mixed)
|
| 1418 |
+
ostringstream filename;
|
| 1419 |
+
if (epoch < 10)
|
| 1420 |
+
filename << weightDumpStem << "_mixed_0" << epoch;
|
| 1421 |
+
else
|
| 1422 |
+
filename << weightDumpStem << "_mixed_" << epoch;
|
| 1423 |
+
|
| 1424 |
+
if (weightDumpFrequency > 1)
|
| 1425 |
+
filename << "_" << weightEpochDump;
|
| 1426 |
+
|
| 1427 |
+
mixedWeights.Save(filename.str());
|
| 1428 |
+
cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl;
|
| 1429 |
+
}
|
| 1430 |
+
if (dumpMixedWeights) {
|
| 1431 |
+
if (mix && rank == 0 && !weightDumpStem.empty()) {
|
| 1432 |
+
// dump mixed weights instead of average weights
|
| 1433 |
+
ostringstream filename;
|
| 1434 |
+
if (epoch < 10)
|
| 1435 |
+
filename << weightDumpStem << "_0" << epoch;
|
| 1436 |
+
else
|
| 1437 |
+
filename << weightDumpStem << "_" << epoch;
|
| 1438 |
+
|
| 1439 |
+
if (weightDumpFrequency > 1)
|
| 1440 |
+
filename << "_" << weightEpochDump;
|
| 1441 |
+
|
| 1442 |
+
cerr << "Dumping mixed weights during epoch " << epoch << " to " << filename.str() << endl << endl;
|
| 1443 |
+
mixedWeights.Save(filename.str());
|
| 1444 |
+
++weightEpochDump;
|
| 1445 |
+
}
|
| 1446 |
+
} else {
|
| 1447 |
+
if (evaluateModulo(shardPosition, dumping_base, actualBatchSize)) {
|
| 1448 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", dump weights.. (pos: " << shardPosition << ", base: " << dumping_base << ")" << endl;
|
| 1449 |
+
ScoreComponentCollection tmpAverageWeights(cumulativeWeights);
|
| 1450 |
+
bool proceed = false;
|
| 1451 |
+
if (accumulateWeights) {
|
| 1452 |
+
if (numberOfUpdates > 0) {
|
| 1453 |
+
tmpAverageWeights.DivideEquals(numberOfUpdates);
|
| 1454 |
+
proceed = true;
|
| 1455 |
+
}
|
| 1456 |
+
} else {
|
| 1457 |
+
if (numberOfUpdatesThisEpoch > 0) {
|
| 1458 |
+
if (sparseNoAverage) // average only core weights
|
| 1459 |
+
tmpAverageWeights.CoreDivideEquals(numberOfUpdatesThisEpoch);
|
| 1460 |
+
else if (sparseAverage)
|
| 1461 |
+
tmpAverageWeights.DivideEquals(cumulativeWeightsBinary);
|
| 1462 |
+
else
|
| 1463 |
+
tmpAverageWeights.DivideEquals(numberOfUpdatesThisEpoch);
|
| 1464 |
+
proceed = true;
|
| 1465 |
+
}
|
| 1466 |
+
}
|
| 1467 |
+
|
| 1468 |
+
if (proceed) {
|
| 1469 |
+
#ifdef MPI_ENABLE
|
| 1470 |
+
// average across processes
|
| 1471 |
+
mpi::reduce(world, tmpAverageWeights, mixedAverageWeights, SCCPlus(), 0);
|
| 1472 |
+
ScoreComponentCollection totalBinary;
|
| 1473 |
+
if (sparseAverage) {
|
| 1474 |
+
ScoreComponentCollection binary;
|
| 1475 |
+
binary.SetToBinaryOf(mosesWeights);
|
| 1476 |
+
mpi::reduce(world, binary, totalBinary, SCCPlus(), 0);
|
| 1477 |
+
}
|
| 1478 |
+
#endif
|
| 1479 |
+
#ifndef MPI_ENABLE
|
| 1480 |
+
mixedAverageWeights = tmpAverageWeights;
|
| 1481 |
+
//FIXME: What do to for non-mpi version
|
| 1482 |
+
ScoreComponentCollection totalBinary;
|
| 1483 |
+
#endif
|
| 1484 |
+
if (rank == 0 && !weightDumpStem.empty()) {
|
| 1485 |
+
// divide by number of processes
|
| 1486 |
+
if (sparseNoAverage)
|
| 1487 |
+
mixedAverageWeights.CoreDivideEquals(size); // average only core weights
|
| 1488 |
+
else if (sparseAverage)
|
| 1489 |
+
mixedAverageWeights.DivideEquals(totalBinary);
|
| 1490 |
+
else
|
| 1491 |
+
mixedAverageWeights.DivideEquals(size);
|
| 1492 |
+
|
| 1493 |
+
// normalise weights after averaging
|
| 1494 |
+
if (normaliseWeights) {
|
| 1495 |
+
mixedAverageWeights.L1Normalise();
|
| 1496 |
+
}
|
| 1497 |
+
|
| 1498 |
+
// dump final average weights
|
| 1499 |
+
ostringstream filename;
|
| 1500 |
+
if (epoch < 10) {
|
| 1501 |
+
filename << weightDumpStem << "_0" << epoch;
|
| 1502 |
+
} else {
|
| 1503 |
+
filename << weightDumpStem << "_" << epoch;
|
| 1504 |
+
}
|
| 1505 |
+
|
| 1506 |
+
if (weightDumpFrequency > 1) {
|
| 1507 |
+
filename << "_" << weightEpochDump;
|
| 1508 |
+
}
|
| 1509 |
+
|
| 1510 |
+
/*if (accumulateWeights) {
|
| 1511 |
+
cerr << "\nMixed average weights (cumulative) during epoch " << epoch << ": " << mixedAverageWeights << endl;
|
| 1512 |
+
} else {
|
| 1513 |
+
cerr << "\nMixed average weights during epoch " << epoch << ": " << mixedAverageWeights << endl;
|
| 1514 |
+
}*/
|
| 1515 |
+
|
| 1516 |
+
cerr << "Dumping mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl;
|
| 1517 |
+
mixedAverageWeights.Save(filename.str());
|
| 1518 |
+
++weightEpochDump;
|
| 1519 |
+
|
| 1520 |
+
if (weightEpochDump == weightDumpFrequency) {
|
| 1521 |
+
if (l1_regularize) {
|
| 1522 |
+
size_t pruned = mixedAverageWeights.SparseL1Regularize(l1_lambda);
|
| 1523 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1524 |
+
<< "l1-reg. on mixedAverageWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl;
|
| 1525 |
+
|
| 1526 |
+
}
|
| 1527 |
+
if (l2_regularize) {
|
| 1528 |
+
mixedAverageWeights.SparseL2Regularize(l2_lambda);
|
| 1529 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", "
|
| 1530 |
+
<< "l2-reg. on mixedAverageWeights with lambda=" << l2_lambda << endl;
|
| 1531 |
+
}
|
| 1532 |
+
|
| 1533 |
+
if (l1_regularize || l2_regularize) {
|
| 1534 |
+
filename << "_reg";
|
| 1535 |
+
cerr << "Dumping regularized mixed average weights during epoch " << epoch << " to " << filename.str() << endl << endl;
|
| 1536 |
+
mixedAverageWeights.Save(filename.str());
|
| 1537 |
+
}
|
| 1538 |
+
}
|
| 1539 |
+
|
| 1540 |
+
if (weightEpochDump == weightDumpFrequency && printFeatureCounts) {
|
| 1541 |
+
// print out all features with counts
|
| 1542 |
+
stringstream s1, s2;
|
| 1543 |
+
s1 << "sparse_feature_hope_counts" << "_" << epoch;
|
| 1544 |
+
s2 << "sparse_feature_fear_counts" << "_" << epoch;
|
| 1545 |
+
ofstream sparseFeatureCountsHope(s1.str().c_str());
|
| 1546 |
+
ofstream sparseFeatureCountsFear(s2.str().c_str());
|
| 1547 |
+
|
| 1548 |
+
mixedAverageWeights.PrintSparseHopeFeatureCounts(sparseFeatureCountsHope);
|
| 1549 |
+
mixedAverageWeights.PrintSparseFearFeatureCounts(sparseFeatureCountsFear);
|
| 1550 |
+
sparseFeatureCountsHope.close();
|
| 1551 |
+
sparseFeatureCountsFear.close();
|
| 1552 |
+
}
|
| 1553 |
+
}
|
| 1554 |
+
}
|
| 1555 |
+
}// end dumping
|
| 1556 |
+
} // end if dump
|
| 1557 |
+
} // end of shard loop, end of this epoch
|
| 1558 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", end of epoch.." << endl;
|
| 1559 |
+
|
| 1560 |
+
if (historyBleu || simpleHistoryBleu) {
|
| 1561 |
+
cerr << "Bleu feature history after epoch " << epoch << endl;
|
| 1562 |
+
decoder->printBleuFeatureHistory(cerr);
|
| 1563 |
+
}
|
| 1564 |
+
// cerr << "Rank " << rank << ", epoch " << epoch << ", sum of violated constraints: " << sumStillViolatedConstraints << endl;
|
| 1565 |
+
|
| 1566 |
+
// Check whether there were any weight updates during this epoch
|
| 1567 |
+
size_t sumUpdates;
|
| 1568 |
+
size_t *sendbuf_uint, *recvbuf_uint;
|
| 1569 |
+
sendbuf_uint = (size_t *) malloc(sizeof(size_t));
|
| 1570 |
+
recvbuf_uint = (size_t *) malloc(sizeof(size_t));
|
| 1571 |
+
#ifdef MPI_ENABLE
|
| 1572 |
+
sendbuf_uint[0] = numberOfUpdatesThisEpoch;
|
| 1573 |
+
recvbuf_uint[0] = 0;
|
| 1574 |
+
MPI_Reduce(sendbuf_uint, recvbuf_uint, 1, MPI_UNSIGNED, MPI_SUM, 0, world);
|
| 1575 |
+
sumUpdates = recvbuf_uint[0];
|
| 1576 |
+
#endif
|
| 1577 |
+
#ifndef MPI_ENABLE
|
| 1578 |
+
sumUpdates = numberOfUpdatesThisEpoch;
|
| 1579 |
+
#endif
|
| 1580 |
+
if (rank == 0 && sumUpdates == 0) {
|
| 1581 |
+
cerr << "\nNo weight updates during this epoch.. stopping." << endl;
|
| 1582 |
+
stop = true;
|
| 1583 |
+
#ifdef MPI_ENABLE
|
| 1584 |
+
mpi::broadcast(world, stop, 0);
|
| 1585 |
+
#endif
|
| 1586 |
+
}
|
| 1587 |
+
|
| 1588 |
+
if (!stop) {
|
| 1589 |
+
// Test if weights have converged
|
| 1590 |
+
if (weightConvergence) {
|
| 1591 |
+
bool reached = true;
|
| 1592 |
+
if (rank == 0 && (epoch >= 2)) {
|
| 1593 |
+
ScoreComponentCollection firstDiff, secondDiff;
|
| 1594 |
+
if (dumpMixedWeights) {
|
| 1595 |
+
firstDiff = mixedWeights;
|
| 1596 |
+
firstDiff.MinusEquals(mixedWeightsPrevious);
|
| 1597 |
+
secondDiff = mixedWeights;
|
| 1598 |
+
secondDiff.MinusEquals(mixedWeightsBeforePrevious);
|
| 1599 |
+
} else {
|
| 1600 |
+
firstDiff = mixedAverageWeights;
|
| 1601 |
+
firstDiff.MinusEquals(mixedAverageWeightsPrevious);
|
| 1602 |
+
secondDiff = mixedAverageWeights;
|
| 1603 |
+
secondDiff.MinusEquals(mixedAverageWeightsBeforePrevious);
|
| 1604 |
+
}
|
| 1605 |
+
VERBOSE(1, "Average weight changes since previous epoch: " << firstDiff << " (max: " << firstDiff.GetLInfNorm() << ")" << endl);
|
| 1606 |
+
VERBOSE(1, "Average weight changes since before previous epoch: " << secondDiff << " (max: " << secondDiff.GetLInfNorm() << ")" << endl << endl);
|
| 1607 |
+
|
| 1608 |
+
// check whether stopping criterion has been reached
|
| 1609 |
+
// (both difference vectors must have all weight changes smaller than min_weight_change)
|
| 1610 |
+
if (firstDiff.GetLInfNorm() >= min_weight_change)
|
| 1611 |
+
reached = false;
|
| 1612 |
+
if (secondDiff.GetLInfNorm() >= min_weight_change)
|
| 1613 |
+
reached = false;
|
| 1614 |
+
if (reached) {
|
| 1615 |
+
// stop MIRA
|
| 1616 |
+
stop = true;
|
| 1617 |
+
cerr << "\nWeights have converged after epoch " << epoch << ".. stopping MIRA." << endl;
|
| 1618 |
+
ScoreComponentCollection dummy;
|
| 1619 |
+
ostringstream endfilename;
|
| 1620 |
+
endfilename << "stopping";
|
| 1621 |
+
dummy.Save(endfilename.str());
|
| 1622 |
+
}
|
| 1623 |
+
}
|
| 1624 |
+
|
| 1625 |
+
mixedWeightsBeforePrevious = mixedWeightsPrevious;
|
| 1626 |
+
mixedWeightsPrevious = mixedWeights;
|
| 1627 |
+
mixedAverageWeightsBeforePrevious = mixedAverageWeightsPrevious;
|
| 1628 |
+
mixedAverageWeightsPrevious = mixedAverageWeights;
|
| 1629 |
+
#ifdef MPI_ENABLE
|
| 1630 |
+
mpi::broadcast(world, stop, 0);
|
| 1631 |
+
#endif
|
| 1632 |
+
} //end if (weightConvergence)
|
| 1633 |
+
}
|
| 1634 |
+
} // end of epoch loop
|
| 1635 |
+
|
| 1636 |
+
#ifdef MPI_ENABLE
|
| 1637 |
+
MPI_Finalize();
|
| 1638 |
+
#endif
|
| 1639 |
+
|
| 1640 |
+
time(&now);
|
| 1641 |
+
cerr << "Rank " << rank << ", " << ctime(&now);
|
| 1642 |
+
|
| 1643 |
+
if (rank == 0) {
|
| 1644 |
+
ScoreComponentCollection dummy;
|
| 1645 |
+
ostringstream endfilename;
|
| 1646 |
+
endfilename << "finished";
|
| 1647 |
+
dummy.Save(endfilename.str());
|
| 1648 |
+
}
|
| 1649 |
+
|
| 1650 |
+
delete decoder;
|
| 1651 |
+
exit(0);
|
| 1652 |
+
}
|
| 1653 |
+
|
| 1654 |
+
bool loadSentences(const string& filename, vector<string>& sentences)
|
| 1655 |
+
{
|
| 1656 |
+
ifstream in(filename.c_str());
|
| 1657 |
+
if (!in)
|
| 1658 |
+
return false;
|
| 1659 |
+
string line;
|
| 1660 |
+
while (getline(in, line))
|
| 1661 |
+
sentences.push_back(line);
|
| 1662 |
+
return true;
|
| 1663 |
+
}
|
| 1664 |
+
|
| 1665 |
+
bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size)
|
| 1666 |
+
{
|
| 1667 |
+
if (mix_or_dump_base == 0) return 0;
|
| 1668 |
+
if (actual_batch_size > 1) {
|
| 1669 |
+
bool mix_or_dump = false;
|
| 1670 |
+
size_t numberSubtracts = actual_batch_size;
|
| 1671 |
+
do {
|
| 1672 |
+
if (shard_position % mix_or_dump_base == 0) {
|
| 1673 |
+
mix_or_dump = true;
|
| 1674 |
+
break;
|
| 1675 |
+
}
|
| 1676 |
+
--shard_position;
|
| 1677 |
+
--numberSubtracts;
|
| 1678 |
+
} while (numberSubtracts > 0);
|
| 1679 |
+
return mix_or_dump;
|
| 1680 |
+
} else {
|
| 1681 |
+
return ((shard_position % mix_or_dump_base) == 0);
|
| 1682 |
+
}
|
| 1683 |
+
}
|
| 1684 |
+
|
| 1685 |
+
void printFeatureValues(vector<vector<ScoreComponentCollection> > &featureValues)
|
| 1686 |
+
{
|
| 1687 |
+
for (size_t i = 0; i < featureValues.size(); ++i) {
|
| 1688 |
+
for (size_t j = 0; j < featureValues[i].size(); ++j) {
|
| 1689 |
+
cerr << featureValues[i][j] << endl;
|
| 1690 |
+
}
|
| 1691 |
+
}
|
| 1692 |
+
cerr << endl;
|
| 1693 |
+
}
|
| 1694 |
+
|
| 1695 |
+
void deleteTranslations(vector<vector<const Word*> > &translations)
|
| 1696 |
+
{
|
| 1697 |
+
for (size_t i = 0; i < translations.size(); ++i) {
|
| 1698 |
+
for (size_t j = 0; j < translations[i].size(); ++j) {
|
| 1699 |
+
delete translations[i][j];
|
| 1700 |
+
}
|
| 1701 |
+
}
|
| 1702 |
+
}
|
| 1703 |
+
|
| 1704 |
+
void decodeHopeOrFear(size_t rank, size_t size, size_t decode, string filename, vector<string> &inputSentences, MosesDecoder* decoder, size_t n, float bleuWeight)
|
| 1705 |
+
{
|
| 1706 |
+
if (decode == 1)
|
| 1707 |
+
cerr << "Rank " << rank << ", decoding dev input set according to hope objective.. " << endl;
|
| 1708 |
+
else if (decode == 2)
|
| 1709 |
+
cerr << "Rank " << rank << ", decoding dev input set according to fear objective.. " << endl;
|
| 1710 |
+
else
|
| 1711 |
+
cerr << "Rank " << rank << ", decoding dev input set according to normal objective.. " << endl;
|
| 1712 |
+
|
| 1713 |
+
// Create shards according to the number of processes used
|
| 1714 |
+
vector<size_t> order;
|
| 1715 |
+
for (size_t i = 0; i < inputSentences.size(); ++i)
|
| 1716 |
+
order.push_back(i);
|
| 1717 |
+
|
| 1718 |
+
vector<size_t> shard;
|
| 1719 |
+
float shardSize = (float) (order.size()) / size;
|
| 1720 |
+
size_t shardStart = (size_t) (shardSize * rank);
|
| 1721 |
+
size_t shardEnd = (size_t) (shardSize * (rank + 1));
|
| 1722 |
+
if (rank == size - 1) {
|
| 1723 |
+
shardEnd = inputSentences.size();
|
| 1724 |
+
shardSize = shardEnd - shardStart;
|
| 1725 |
+
}
|
| 1726 |
+
VERBOSE(1, "Rank " << rank << ", shard start: " << shardStart << " Shard end: " << shardEnd << endl);
|
| 1727 |
+
VERBOSE(1, "Rank " << rank << ", shard size: " << shardSize << endl);
|
| 1728 |
+
shard.resize(shardSize);
|
| 1729 |
+
copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
|
| 1730 |
+
|
| 1731 |
+
// open files for writing
|
| 1732 |
+
stringstream fname;
|
| 1733 |
+
fname << filename << ".rank" << rank;
|
| 1734 |
+
filename = fname.str();
|
| 1735 |
+
ostringstream filename_nbest;
|
| 1736 |
+
filename_nbest << filename << "." << n << "best";
|
| 1737 |
+
ofstream out(filename.c_str());
|
| 1738 |
+
ofstream nbest_out((filename_nbest.str()).c_str());
|
| 1739 |
+
if (!out) {
|
| 1740 |
+
ostringstream msg;
|
| 1741 |
+
msg << "Unable to open " << fname.str();
|
| 1742 |
+
throw runtime_error(msg.str());
|
| 1743 |
+
}
|
| 1744 |
+
if (!nbest_out) {
|
| 1745 |
+
ostringstream msg;
|
| 1746 |
+
msg << "Unable to open " << filename_nbest;
|
| 1747 |
+
throw runtime_error(msg.str());
|
| 1748 |
+
}
|
| 1749 |
+
|
| 1750 |
+
for (size_t i = 0; i < shard.size(); ++i) {
|
| 1751 |
+
size_t sid = shard[i];
|
| 1752 |
+
string& input = inputSentences[sid];
|
| 1753 |
+
|
| 1754 |
+
vector<vector<ScoreComponentCollection> > dummyFeatureValues;
|
| 1755 |
+
vector<vector<float> > dummyBleuScores;
|
| 1756 |
+
vector<vector<float> > dummyModelScores;
|
| 1757 |
+
|
| 1758 |
+
vector<ScoreComponentCollection> newFeatureValues;
|
| 1759 |
+
vector<float> newScores;
|
| 1760 |
+
dummyFeatureValues.push_back(newFeatureValues);
|
| 1761 |
+
dummyBleuScores.push_back(newScores);
|
| 1762 |
+
dummyModelScores.push_back(newScores);
|
| 1763 |
+
|
| 1764 |
+
float factor = 0.0;
|
| 1765 |
+
if (decode == 1) factor = 1.0;
|
| 1766 |
+
if (decode == 2) factor = -1.0;
|
| 1767 |
+
cerr << "Rank " << rank << ", translating sentence " << sid << endl;
|
| 1768 |
+
bool realBleu = false;
|
| 1769 |
+
vector< vector<const Word*> > nbestOutput = decoder->getNBest(input, sid, n, factor, bleuWeight, dummyFeatureValues[0],
|
| 1770 |
+
dummyBleuScores[0], dummyModelScores[0], n, realBleu, true, false, rank, 0, "");
|
| 1771 |
+
cerr << endl;
|
| 1772 |
+
decoder->cleanup(StaticData::Instance().IsChart());
|
| 1773 |
+
|
| 1774 |
+
for (size_t i = 0; i < nbestOutput.size(); ++i) {
|
| 1775 |
+
vector<const Word*> output = nbestOutput[i];
|
| 1776 |
+
stringstream translation;
|
| 1777 |
+
for (size_t k = 0; k < output.size(); ++k) {
|
| 1778 |
+
Word* w = const_cast<Word*>(output[k]);
|
| 1779 |
+
translation << w->GetString(0);
|
| 1780 |
+
translation << " ";
|
| 1781 |
+
}
|
| 1782 |
+
|
| 1783 |
+
if (i == 0)
|
| 1784 |
+
out << translation.str() << endl;
|
| 1785 |
+
nbest_out << sid << " ||| " << translation.str() << " ||| " << dummyFeatureValues[0][i] <<
|
| 1786 |
+
" ||| " << dummyModelScores[0][i] << " ||| sBleu=" << dummyBleuScores[0][i] << endl;
|
| 1787 |
+
}
|
| 1788 |
+
}
|
| 1789 |
+
|
| 1790 |
+
out.close();
|
| 1791 |
+
nbest_out.close();
|
| 1792 |
+
cerr << "Closing files " << filename << " and " << filename_nbest.str() << endl;
|
| 1793 |
+
|
| 1794 |
+
#ifdef MPI_ENABLE
|
| 1795 |
+
MPI_Finalize();
|
| 1796 |
+
#endif
|
| 1797 |
+
|
| 1798 |
+
time_t now;
|
| 1799 |
+
time(&now);
|
| 1800 |
+
cerr << "Rank " << rank << ", " << ctime(&now);
|
| 1801 |
+
|
| 1802 |
+
delete decoder;
|
| 1803 |
+
exit(0);
|
| 1804 |
+
}
|
| 1805 |
+
|
| 1806 |
+
void applyLearningRates(vector<vector<ScoreComponentCollection> > &featureValues, float core_r0, float sparse_r0)
|
| 1807 |
+
{
|
| 1808 |
+
for (size_t i=0; i<featureValues.size(); ++i) // each item in batch
|
| 1809 |
+
for (size_t j=0; j<featureValues[i].size(); ++j) // each item in nbest
|
| 1810 |
+
featureValues[i][j].MultiplyEquals(core_r0, sparse_r0);
|
| 1811 |
+
}
|
| 1812 |
+
|
| 1813 |
+
void applyPerFeatureLearningRates(vector<vector<ScoreComponentCollection> > &featureValues, ScoreComponentCollection featureLearningRates, float sparse_r0)
|
| 1814 |
+
{
|
| 1815 |
+
for (size_t i=0; i<featureValues.size(); ++i) // each item in batch
|
| 1816 |
+
for (size_t j=0; j<featureValues[i].size(); ++j) // each item in nbest
|
| 1817 |
+
featureValues[i][j].MultiplyEqualsBackoff(featureLearningRates, sparse_r0);
|
| 1818 |
+
}
|
| 1819 |
+
|
| 1820 |
+
void scaleFeatureScore(const FeatureFunction *sp, float scaling_factor, vector<vector<ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch)
|
| 1821 |
+
{
|
| 1822 |
+
string name = sp->GetScoreProducerDescription();
|
| 1823 |
+
|
| 1824 |
+
// scale down score
|
| 1825 |
+
float featureScore;
|
| 1826 |
+
for (size_t i=0; i<featureValues.size(); ++i) { // each item in batch
|
| 1827 |
+
for (size_t j=0; j<featureValues[i].size(); ++j) { // each item in nbest
|
| 1828 |
+
featureScore = featureValues[i][j].GetScoreForProducer(sp);
|
| 1829 |
+
featureValues[i][j].Assign(sp, featureScore*scaling_factor);
|
| 1830 |
+
//cerr << "Rank " << rank << ", epoch " << epoch << ", " << name << " score scaled from " << featureScore << " to " << featureScore/scaling_factor << endl;
|
| 1831 |
+
}
|
| 1832 |
+
}
|
| 1833 |
+
}
|
| 1834 |
+
|
| 1835 |
+
void scaleFeatureScores(const FeatureFunction *sp, float scaling_factor, vector<vector<ScoreComponentCollection> > &featureValues, size_t rank, size_t epoch)
|
| 1836 |
+
{
|
| 1837 |
+
string name = sp->GetScoreProducerDescription();
|
| 1838 |
+
|
| 1839 |
+
// scale down score
|
| 1840 |
+
for (size_t i=0; i<featureValues.size(); ++i) { // each item in batch
|
| 1841 |
+
for (size_t j=0; j<featureValues[i].size(); ++j) { // each item in nbest
|
| 1842 |
+
vector<float> featureScores = featureValues[i][j].GetScoresForProducer(sp);
|
| 1843 |
+
for (size_t k=0; k<featureScores.size(); ++k)
|
| 1844 |
+
featureScores[k] *= scaling_factor;
|
| 1845 |
+
featureValues[i][j].Assign(sp, featureScores);
|
| 1846 |
+
//cerr << "Rank " << rank << ", epoch " << epoch << ", " << name << " score scaled from " << featureScore << " to " << featureScore/scaling_factor << endl;
|
| 1847 |
+
}
|
| 1848 |
+
}
|
| 1849 |
+
}
|
mosesdecoder/contrib/mira/Perceptron.cpp
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - factored phrase-based language decoder
|
| 3 |
+
Copyright (C) 2010 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "Optimiser.h"
|
| 21 |
+
|
| 22 |
+
using namespace Moses;
|
| 23 |
+
using namespace std;
|
| 24 |
+
|
| 25 |
+
namespace Mira
|
| 26 |
+
{
|
| 27 |
+
|
| 28 |
+
size_t Perceptron::updateWeightsHopeFear(
|
| 29 |
+
ScoreComponentCollection& weightUpdate,
|
| 30 |
+
const vector< vector<ScoreComponentCollection> >& featureValuesHope,
|
| 31 |
+
const vector< vector<ScoreComponentCollection> >& featureValuesFear,
|
| 32 |
+
const vector< vector<float> >& dummy1,
|
| 33 |
+
const vector< vector<float> >& dummy2,
|
| 34 |
+
const vector< vector<float> >& dummy3,
|
| 35 |
+
const vector< vector<float> >& dummy4,
|
| 36 |
+
float perceptron_learning_rate,
|
| 37 |
+
size_t rank,
|
| 38 |
+
size_t epoch,
|
| 39 |
+
int updatePosition)
|
| 40 |
+
{
|
| 41 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", hope: " << featureValuesHope[0][0] << endl;
|
| 42 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", fear: " << featureValuesFear[0][0] << endl;
|
| 43 |
+
ScoreComponentCollection featureValueDiff = featureValuesHope[0][0];
|
| 44 |
+
featureValueDiff.MinusEquals(featureValuesFear[0][0]);
|
| 45 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", hope - fear: " << featureValueDiff << endl;
|
| 46 |
+
featureValueDiff.MultiplyEquals(perceptron_learning_rate);
|
| 47 |
+
weightUpdate.PlusEquals(featureValueDiff);
|
| 48 |
+
cerr << "Rank " << rank << ", epoch " << epoch << ", update: " << featureValueDiff << endl;
|
| 49 |
+
return 0;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
}
|
| 53 |
+
|
mosesdecoder/contrib/mira/mira.xcodeproj/project.pbxproj
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// !$*UTF8*$!
|
| 2 |
+
{
|
| 3 |
+
archiveVersion = 1;
|
| 4 |
+
classes = {
|
| 5 |
+
};
|
| 6 |
+
objectVersion = 45;
|
| 7 |
+
objects = {
|
| 8 |
+
|
| 9 |
+
/* Begin PBXBuildFile section */
|
| 10 |
+
1E141A311243527800123194 /* Perceptron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E141A2F1243527800123194 /* Perceptron.cpp */; };
|
| 11 |
+
1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */; };
|
| 12 |
+
1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC6391242602F0059001A /* Decoder.cpp */; };
|
| 13 |
+
1E9DC63D1242602F0059001A /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DC63B1242602F0059001A /* Main.cpp */; };
|
| 14 |
+
1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D1124268310059001A /* libmoses-chart.a */; };
|
| 15 |
+
1E9DC6DB124268510059001A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6CB124268270059001A /* libmoses.a */; };
|
| 16 |
+
1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DC6D9124268440059001A /* libOnDiskPt.a */; };
|
| 17 |
+
8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* mira.1 */; };
|
| 18 |
+
/* End PBXBuildFile section */
|
| 19 |
+
|
| 20 |
+
/* Begin PBXContainerItemProxy section */
|
| 21 |
+
1E9DC6CA124268270059001A /* PBXContainerItemProxy */ = {
|
| 22 |
+
isa = PBXContainerItemProxy;
|
| 23 |
+
containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */;
|
| 24 |
+
proxyType = 2;
|
| 25 |
+
remoteGlobalIDString = D2AAC046055464E500DB518D;
|
| 26 |
+
remoteInfo = moses;
|
| 27 |
+
};
|
| 28 |
+
1E9DC6D0124268310059001A /* PBXContainerItemProxy */ = {
|
| 29 |
+
isa = PBXContainerItemProxy;
|
| 30 |
+
containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */;
|
| 31 |
+
proxyType = 2;
|
| 32 |
+
remoteGlobalIDString = D2AAC046055464E500DB518D;
|
| 33 |
+
remoteInfo = "moses-chart";
|
| 34 |
+
};
|
| 35 |
+
1E9DC6D8124268440059001A /* PBXContainerItemProxy */ = {
|
| 36 |
+
isa = PBXContainerItemProxy;
|
| 37 |
+
containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */;
|
| 38 |
+
proxyType = 2;
|
| 39 |
+
remoteGlobalIDString = D2AAC046055464E500DB518D;
|
| 40 |
+
remoteInfo = OnDiskPt;
|
| 41 |
+
};
|
| 42 |
+
1EF4E84C12440612006233A0 /* PBXContainerItemProxy */ = {
|
| 43 |
+
isa = PBXContainerItemProxy;
|
| 44 |
+
containerPortal = 1E9DC6C6124268270059001A /* moses.xcodeproj */;
|
| 45 |
+
proxyType = 1;
|
| 46 |
+
remoteGlobalIDString = D2AAC045055464E500DB518D /* moses */;
|
| 47 |
+
remoteInfo = moses;
|
| 48 |
+
};
|
| 49 |
+
1EF4E84E12440612006233A0 /* PBXContainerItemProxy */ = {
|
| 50 |
+
isa = PBXContainerItemProxy;
|
| 51 |
+
containerPortal = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */;
|
| 52 |
+
proxyType = 1;
|
| 53 |
+
remoteGlobalIDString = D2AAC045055464E500DB518D /* moses-chart */;
|
| 54 |
+
remoteInfo = "moses-chart";
|
| 55 |
+
};
|
| 56 |
+
1EF4E85012440612006233A0 /* PBXContainerItemProxy */ = {
|
| 57 |
+
isa = PBXContainerItemProxy;
|
| 58 |
+
containerPortal = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */;
|
| 59 |
+
proxyType = 1;
|
| 60 |
+
remoteGlobalIDString = D2AAC045055464E500DB518D /* OnDiskPt */;
|
| 61 |
+
remoteInfo = OnDiskPt;
|
| 62 |
+
};
|
| 63 |
+
/* End PBXContainerItemProxy section */
|
| 64 |
+
|
| 65 |
+
/* Begin PBXCopyFilesBuildPhase section */
|
| 66 |
+
8DD76F690486A84900D96B5E /* CopyFiles */ = {
|
| 67 |
+
isa = PBXCopyFilesBuildPhase;
|
| 68 |
+
buildActionMask = 8;
|
| 69 |
+
dstPath = /usr/share/man/man1/;
|
| 70 |
+
dstSubfolderSpec = 0;
|
| 71 |
+
files = (
|
| 72 |
+
8DD76F6A0486A84900D96B5E /* mira.1 in CopyFiles */,
|
| 73 |
+
);
|
| 74 |
+
runOnlyForDeploymentPostprocessing = 1;
|
| 75 |
+
};
|
| 76 |
+
/* End PBXCopyFilesBuildPhase section */
|
| 77 |
+
|
| 78 |
+
/* Begin PBXFileReference section */
|
| 79 |
+
1E141A2F1243527800123194 /* Perceptron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Perceptron.cpp; sourceTree = "<group>"; };
|
| 80 |
+
1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MiraOptimiser.cpp; sourceTree = "<group>"; };
|
| 81 |
+
1E9DC6391242602F0059001A /* Decoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Decoder.cpp; sourceTree = "<group>"; };
|
| 82 |
+
1E9DC63A1242602F0059001A /* Decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Decoder.h; sourceTree = "<group>"; };
|
| 83 |
+
1E9DC63B1242602F0059001A /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Main.cpp; sourceTree = "<group>"; };
|
| 84 |
+
1E9DC63E124260370059001A /* Optimiser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Optimiser.h; sourceTree = "<group>"; };
|
| 85 |
+
1E9DC6C6124268270059001A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; };
|
| 86 |
+
1E9DC6CC124268310059001A /* moses-chart.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = "moses-chart.xcodeproj"; path = "../moses-chart/moses-chart.xcodeproj"; sourceTree = SOURCE_ROOT; };
|
| 87 |
+
1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; };
|
| 88 |
+
1E9DC76712426FC60059001A /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Main.h; sourceTree = "<group>"; };
|
| 89 |
+
8DD76F6C0486A84900D96B5E /* mira */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mira; sourceTree = BUILT_PRODUCTS_DIR; };
|
| 90 |
+
C6859E8B029090EE04C91782 /* mira.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mira.1; sourceTree = "<group>"; };
|
| 91 |
+
/* End PBXFileReference section */
|
| 92 |
+
|
| 93 |
+
/* Begin PBXFrameworksBuildPhase section */
|
| 94 |
+
8DD76F660486A84900D96B5E /* Frameworks */ = {
|
| 95 |
+
isa = PBXFrameworksBuildPhase;
|
| 96 |
+
buildActionMask = 2147483647;
|
| 97 |
+
files = (
|
| 98 |
+
1E9DC6DC124268580059001A /* libOnDiskPt.a in Frameworks */,
|
| 99 |
+
1E9DC6DB124268510059001A /* libmoses.a in Frameworks */,
|
| 100 |
+
1E9DC6DA1242684C0059001A /* libmoses-chart.a in Frameworks */,
|
| 101 |
+
);
|
| 102 |
+
runOnlyForDeploymentPostprocessing = 0;
|
| 103 |
+
};
|
| 104 |
+
/* End PBXFrameworksBuildPhase section */
|
| 105 |
+
|
| 106 |
+
/* Begin PBXGroup section */
|
| 107 |
+
08FB7794FE84155DC02AAC07 /* mira */ = {
|
| 108 |
+
isa = PBXGroup;
|
| 109 |
+
children = (
|
| 110 |
+
1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */,
|
| 111 |
+
1E9DC6CC124268310059001A /* moses-chart.xcodeproj */,
|
| 112 |
+
1E9DC6C6124268270059001A /* moses.xcodeproj */,
|
| 113 |
+
08FB7795FE84155DC02AAC07 /* Source */,
|
| 114 |
+
C6859E8C029090F304C91782 /* Documentation */,
|
| 115 |
+
1AB674ADFE9D54B511CA2CBB /* Products */,
|
| 116 |
+
);
|
| 117 |
+
name = mira;
|
| 118 |
+
sourceTree = "<group>";
|
| 119 |
+
};
|
| 120 |
+
08FB7795FE84155DC02AAC07 /* Source */ = {
|
| 121 |
+
isa = PBXGroup;
|
| 122 |
+
children = (
|
| 123 |
+
1E56EBF41243B91600E8315C /* MiraOptimiser.cpp */,
|
| 124 |
+
1E141A2F1243527800123194 /* Perceptron.cpp */,
|
| 125 |
+
1E9DC63E124260370059001A /* Optimiser.h */,
|
| 126 |
+
1E9DC6391242602F0059001A /* Decoder.cpp */,
|
| 127 |
+
1E9DC63A1242602F0059001A /* Decoder.h */,
|
| 128 |
+
1E9DC63B1242602F0059001A /* Main.cpp */,
|
| 129 |
+
1E9DC76712426FC60059001A /* Main.h */,
|
| 130 |
+
);
|
| 131 |
+
name = Source;
|
| 132 |
+
sourceTree = "<group>";
|
| 133 |
+
};
|
| 134 |
+
1AB674ADFE9D54B511CA2CBB /* Products */ = {
|
| 135 |
+
isa = PBXGroup;
|
| 136 |
+
children = (
|
| 137 |
+
8DD76F6C0486A84900D96B5E /* mira */,
|
| 138 |
+
);
|
| 139 |
+
name = Products;
|
| 140 |
+
sourceTree = "<group>";
|
| 141 |
+
};
|
| 142 |
+
1E9DC6C7124268270059001A /* Products */ = {
|
| 143 |
+
isa = PBXGroup;
|
| 144 |
+
children = (
|
| 145 |
+
1E9DC6CB124268270059001A /* libmoses.a */,
|
| 146 |
+
);
|
| 147 |
+
name = Products;
|
| 148 |
+
sourceTree = "<group>";
|
| 149 |
+
};
|
| 150 |
+
1E9DC6CD124268310059001A /* Products */ = {
|
| 151 |
+
isa = PBXGroup;
|
| 152 |
+
children = (
|
| 153 |
+
1E9DC6D1124268310059001A /* libmoses-chart.a */,
|
| 154 |
+
);
|
| 155 |
+
name = Products;
|
| 156 |
+
sourceTree = "<group>";
|
| 157 |
+
};
|
| 158 |
+
1E9DC6D5124268440059001A /* Products */ = {
|
| 159 |
+
isa = PBXGroup;
|
| 160 |
+
children = (
|
| 161 |
+
1E9DC6D9124268440059001A /* libOnDiskPt.a */,
|
| 162 |
+
);
|
| 163 |
+
name = Products;
|
| 164 |
+
sourceTree = "<group>";
|
| 165 |
+
};
|
| 166 |
+
C6859E8C029090F304C91782 /* Documentation */ = {
|
| 167 |
+
isa = PBXGroup;
|
| 168 |
+
children = (
|
| 169 |
+
C6859E8B029090EE04C91782 /* mira.1 */,
|
| 170 |
+
);
|
| 171 |
+
name = Documentation;
|
| 172 |
+
sourceTree = "<group>";
|
| 173 |
+
};
|
| 174 |
+
/* End PBXGroup section */
|
| 175 |
+
|
| 176 |
+
/* Begin PBXNativeTarget section */
|
| 177 |
+
8DD76F620486A84900D96B5E /* mira */ = {
|
| 178 |
+
isa = PBXNativeTarget;
|
| 179 |
+
buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */;
|
| 180 |
+
buildPhases = (
|
| 181 |
+
8DD76F640486A84900D96B5E /* Sources */,
|
| 182 |
+
8DD76F660486A84900D96B5E /* Frameworks */,
|
| 183 |
+
8DD76F690486A84900D96B5E /* CopyFiles */,
|
| 184 |
+
);
|
| 185 |
+
buildRules = (
|
| 186 |
+
);
|
| 187 |
+
dependencies = (
|
| 188 |
+
1EF4E84D12440612006233A0 /* PBXTargetDependency */,
|
| 189 |
+
1EF4E84F12440612006233A0 /* PBXTargetDependency */,
|
| 190 |
+
1EF4E85112440612006233A0 /* PBXTargetDependency */,
|
| 191 |
+
);
|
| 192 |
+
name = mira;
|
| 193 |
+
productInstallPath = "$(HOME)/bin";
|
| 194 |
+
productName = mira;
|
| 195 |
+
productReference = 8DD76F6C0486A84900D96B5E /* mira */;
|
| 196 |
+
productType = "com.apple.product-type.tool";
|
| 197 |
+
};
|
| 198 |
+
/* End PBXNativeTarget section */
|
| 199 |
+
|
| 200 |
+
/* Begin PBXProject section */
|
| 201 |
+
08FB7793FE84155DC02AAC07 /* Project object */ = {
|
| 202 |
+
isa = PBXProject;
|
| 203 |
+
buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */;
|
| 204 |
+
compatibilityVersion = "Xcode 3.1";
|
| 205 |
+
hasScannedForEncodings = 1;
|
| 206 |
+
mainGroup = 08FB7794FE84155DC02AAC07 /* mira */;
|
| 207 |
+
projectDirPath = "";
|
| 208 |
+
projectReferences = (
|
| 209 |
+
{
|
| 210 |
+
ProductGroup = 1E9DC6CD124268310059001A /* Products */;
|
| 211 |
+
ProjectRef = 1E9DC6CC124268310059001A /* moses-chart.xcodeproj */;
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
ProductGroup = 1E9DC6C7124268270059001A /* Products */;
|
| 215 |
+
ProjectRef = 1E9DC6C6124268270059001A /* moses.xcodeproj */;
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
ProductGroup = 1E9DC6D5124268440059001A /* Products */;
|
| 219 |
+
ProjectRef = 1E9DC6D4124268440059001A /* OnDiskPt.xcodeproj */;
|
| 220 |
+
},
|
| 221 |
+
);
|
| 222 |
+
projectRoot = "";
|
| 223 |
+
targets = (
|
| 224 |
+
8DD76F620486A84900D96B5E /* mira */,
|
| 225 |
+
);
|
| 226 |
+
};
|
| 227 |
+
/* End PBXProject section */
|
| 228 |
+
|
| 229 |
+
/* Begin PBXReferenceProxy section */
|
| 230 |
+
1E9DC6CB124268270059001A /* libmoses.a */ = {
|
| 231 |
+
isa = PBXReferenceProxy;
|
| 232 |
+
fileType = archive.ar;
|
| 233 |
+
path = libmoses.a;
|
| 234 |
+
remoteRef = 1E9DC6CA124268270059001A /* PBXContainerItemProxy */;
|
| 235 |
+
sourceTree = BUILT_PRODUCTS_DIR;
|
| 236 |
+
};
|
| 237 |
+
1E9DC6D1124268310059001A /* libmoses-chart.a */ = {
|
| 238 |
+
isa = PBXReferenceProxy;
|
| 239 |
+
fileType = archive.ar;
|
| 240 |
+
path = "libmoses-chart.a";
|
| 241 |
+
remoteRef = 1E9DC6D0124268310059001A /* PBXContainerItemProxy */;
|
| 242 |
+
sourceTree = BUILT_PRODUCTS_DIR;
|
| 243 |
+
};
|
| 244 |
+
1E9DC6D9124268440059001A /* libOnDiskPt.a */ = {
|
| 245 |
+
isa = PBXReferenceProxy;
|
| 246 |
+
fileType = archive.ar;
|
| 247 |
+
path = libOnDiskPt.a;
|
| 248 |
+
remoteRef = 1E9DC6D8124268440059001A /* PBXContainerItemProxy */;
|
| 249 |
+
sourceTree = BUILT_PRODUCTS_DIR;
|
| 250 |
+
};
|
| 251 |
+
/* End PBXReferenceProxy section */
|
| 252 |
+
|
| 253 |
+
/* Begin PBXSourcesBuildPhase section */
|
| 254 |
+
8DD76F640486A84900D96B5E /* Sources */ = {
|
| 255 |
+
isa = PBXSourcesBuildPhase;
|
| 256 |
+
buildActionMask = 2147483647;
|
| 257 |
+
files = (
|
| 258 |
+
1E9DC63C1242602F0059001A /* Decoder.cpp in Sources */,
|
| 259 |
+
1E9DC63D1242602F0059001A /* Main.cpp in Sources */,
|
| 260 |
+
1E141A311243527800123194 /* Perceptron.cpp in Sources */,
|
| 261 |
+
1E56EBF51243B91600E8315C /* MiraOptimiser.cpp in Sources */,
|
| 262 |
+
);
|
| 263 |
+
runOnlyForDeploymentPostprocessing = 0;
|
| 264 |
+
};
|
| 265 |
+
/* End PBXSourcesBuildPhase section */
|
| 266 |
+
|
| 267 |
+
/* Begin PBXTargetDependency section */
|
| 268 |
+
1EF4E84D12440612006233A0 /* PBXTargetDependency */ = {
|
| 269 |
+
isa = PBXTargetDependency;
|
| 270 |
+
name = moses;
|
| 271 |
+
targetProxy = 1EF4E84C12440612006233A0 /* PBXContainerItemProxy */;
|
| 272 |
+
};
|
| 273 |
+
1EF4E84F12440612006233A0 /* PBXTargetDependency */ = {
|
| 274 |
+
isa = PBXTargetDependency;
|
| 275 |
+
name = "moses-chart";
|
| 276 |
+
targetProxy = 1EF4E84E12440612006233A0 /* PBXContainerItemProxy */;
|
| 277 |
+
};
|
| 278 |
+
1EF4E85112440612006233A0 /* PBXTargetDependency */ = {
|
| 279 |
+
isa = PBXTargetDependency;
|
| 280 |
+
name = OnDiskPt;
|
| 281 |
+
targetProxy = 1EF4E85012440612006233A0 /* PBXContainerItemProxy */;
|
| 282 |
+
};
|
| 283 |
+
/* End PBXTargetDependency section */
|
| 284 |
+
|
| 285 |
+
/* Begin XCBuildConfiguration section */
|
| 286 |
+
1DEB923208733DC60010E9CD /* Debug */ = {
|
| 287 |
+
isa = XCBuildConfiguration;
|
| 288 |
+
buildSettings = {
|
| 289 |
+
ALWAYS_SEARCH_USER_PATHS = NO;
|
| 290 |
+
COPY_PHASE_STRIP = NO;
|
| 291 |
+
GCC_DYNAMIC_NO_PIC = NO;
|
| 292 |
+
GCC_ENABLE_FIX_AND_CONTINUE = YES;
|
| 293 |
+
GCC_MODEL_TUNING = G5;
|
| 294 |
+
GCC_OPTIMIZATION_LEVEL = 0;
|
| 295 |
+
INSTALL_PATH = /usr/local/bin;
|
| 296 |
+
LIBRARY_SEARCH_PATHS = (
|
| 297 |
+
../irstlm/lib/i386,
|
| 298 |
+
../srilm/lib/macosx,
|
| 299 |
+
);
|
| 300 |
+
OTHER_LDFLAGS = (
|
| 301 |
+
"-lboost_program_options",
|
| 302 |
+
"-lz",
|
| 303 |
+
"-lirstlm",
|
| 304 |
+
"-lmisc",
|
| 305 |
+
"-ldstruct",
|
| 306 |
+
"-loolm",
|
| 307 |
+
"-lflm",
|
| 308 |
+
"-llattice",
|
| 309 |
+
);
|
| 310 |
+
PRODUCT_NAME = mira;
|
| 311 |
+
};
|
| 312 |
+
name = Debug;
|
| 313 |
+
};
|
| 314 |
+
1DEB923308733DC60010E9CD /* Release */ = {
|
| 315 |
+
isa = XCBuildConfiguration;
|
| 316 |
+
buildSettings = {
|
| 317 |
+
ALWAYS_SEARCH_USER_PATHS = NO;
|
| 318 |
+
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
| 319 |
+
GCC_MODEL_TUNING = G5;
|
| 320 |
+
INSTALL_PATH = /usr/local/bin;
|
| 321 |
+
LIBRARY_SEARCH_PATHS = (
|
| 322 |
+
../irstlm/lib/i386,
|
| 323 |
+
../srilm/lib/macosx,
|
| 324 |
+
);
|
| 325 |
+
OTHER_LDFLAGS = (
|
| 326 |
+
"-lboost_program_options",
|
| 327 |
+
"-lz",
|
| 328 |
+
"-lirstlm",
|
| 329 |
+
"-lmisc",
|
| 330 |
+
"-ldstruct",
|
| 331 |
+
"-loolm",
|
| 332 |
+
"-lflm",
|
| 333 |
+
"-llattice",
|
| 334 |
+
);
|
| 335 |
+
PRODUCT_NAME = mira;
|
| 336 |
+
};
|
| 337 |
+
name = Release;
|
| 338 |
+
};
|
| 339 |
+
1DEB923608733DC60010E9CD /* Debug */ = {
|
| 340 |
+
isa = XCBuildConfiguration;
|
| 341 |
+
buildSettings = {
|
| 342 |
+
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
|
| 343 |
+
GCC_C_LANGUAGE_STANDARD = gnu99;
|
| 344 |
+
GCC_OPTIMIZATION_LEVEL = 0;
|
| 345 |
+
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
| 346 |
+
GCC_WARN_UNUSED_VARIABLE = YES;
|
| 347 |
+
HEADER_SEARCH_PATHS = (
|
| 348 |
+
/usr/local/include,
|
| 349 |
+
"../moses-chart/src",
|
| 350 |
+
../moses/src,
|
| 351 |
+
../irstlm/include,
|
| 352 |
+
);
|
| 353 |
+
ONLY_ACTIVE_ARCH = YES;
|
| 354 |
+
PREBINDING = NO;
|
| 355 |
+
SDKROOT = macosx10.6;
|
| 356 |
+
};
|
| 357 |
+
name = Debug;
|
| 358 |
+
};
|
| 359 |
+
1DEB923708733DC60010E9CD /* Release */ = {
|
| 360 |
+
isa = XCBuildConfiguration;
|
| 361 |
+
buildSettings = {
|
| 362 |
+
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
|
| 363 |
+
GCC_C_LANGUAGE_STANDARD = gnu99;
|
| 364 |
+
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
| 365 |
+
GCC_WARN_UNUSED_VARIABLE = YES;
|
| 366 |
+
HEADER_SEARCH_PATHS = (
|
| 367 |
+
/usr/local/include,
|
| 368 |
+
"../moses-chart/src",
|
| 369 |
+
../moses/src,
|
| 370 |
+
../irstlm/include,
|
| 371 |
+
);
|
| 372 |
+
PREBINDING = NO;
|
| 373 |
+
SDKROOT = macosx10.6;
|
| 374 |
+
};
|
| 375 |
+
name = Release;
|
| 376 |
+
};
|
| 377 |
+
/* End XCBuildConfiguration section */
|
| 378 |
+
|
| 379 |
+
/* Begin XCConfigurationList section */
|
| 380 |
+
1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "mira" */ = {
|
| 381 |
+
isa = XCConfigurationList;
|
| 382 |
+
buildConfigurations = (
|
| 383 |
+
1DEB923208733DC60010E9CD /* Debug */,
|
| 384 |
+
1DEB923308733DC60010E9CD /* Release */,
|
| 385 |
+
);
|
| 386 |
+
defaultConfigurationIsVisible = 0;
|
| 387 |
+
defaultConfigurationName = Release;
|
| 388 |
+
};
|
| 389 |
+
1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "mira" */ = {
|
| 390 |
+
isa = XCConfigurationList;
|
| 391 |
+
buildConfigurations = (
|
| 392 |
+
1DEB923608733DC60010E9CD /* Debug */,
|
| 393 |
+
1DEB923708733DC60010E9CD /* Release */,
|
| 394 |
+
);
|
| 395 |
+
defaultConfigurationIsVisible = 0;
|
| 396 |
+
defaultConfigurationName = Release;
|
| 397 |
+
};
|
| 398 |
+
/* End XCConfigurationList section */
|
| 399 |
+
};
|
| 400 |
+
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
|
| 401 |
+
}
|
mosesdecoder/contrib/moses-speedtest/README.md
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Moses speedtesting framework
|
| 2 |
+
|
| 3 |
+
### Description
|
| 4 |
+
|
| 5 |
+
This is an automatic test framework that is designed to test the day to day performance changes in Moses.
|
| 6 |
+
|
| 7 |
+
### Set up
|
| 8 |
+
|
| 9 |
+
#### Set up a Moses repo
|
| 10 |
+
Set up a Moses repo and build it with the desired configuration.
|
| 11 |
+
```bash
|
| 12 |
+
git clone https://github.com/moses-smt/mosesdecoder.git
|
| 13 |
+
cd mosesdecoder
|
| 14 |
+
./bjam -j10 --with-cmph=/usr/include/
|
| 15 |
+
```
|
| 16 |
+
You need to build Moses first, so that the testsuite knows what command you want it to use when rebuilding against newer revisions.
|
| 17 |
+
|
| 18 |
+
#### Create a parent directory.
|
| 19 |
+
Create a parent directory where the **runtests.py** and related scripts and configuration file should reside.
|
| 20 |
+
This should also be the location of the TEST_DIR and TEST_LOG_DIR as explained in the next section.
|
| 21 |
+
|
| 22 |
+
#### Set up a global configuration file.
|
| 23 |
+
You need a configuration file for the testsuite. A sample configuration file is provided in **testsuite\_config**
|
| 24 |
+
<pre>
|
| 25 |
+
MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
|
| 26 |
+
DROP_CACHES_COMM: sys_drop_caches 3
|
| 27 |
+
TEST_DIR: /home/moses-speedtest/phrase_tables/tests
|
| 28 |
+
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
|
| 29 |
+
BASEBRANCH: RELEASE-2.1.1
|
| 30 |
+
MOSES_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-prof
|
| 31 |
+
MOSES_GOOGLE_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-gperftools
|
| 32 |
+
</pre>
|
| 33 |
+
|
| 34 |
+
The _MOSES\_REPO\_PATH_ is the place where you have set up and built moses.
|
| 35 |
+
The _DROP\_CACHES\_COMM_ is the command that would be used to drop caches. It should run without needing root access.
|
| 36 |
+
_TEST\_DIR_ is the directory where all the tests will reside.
|
| 37 |
+
_TEST\_LOG\_DIR_ is the directory where the performance logs will be gathered. It should be created before running the testsuite for the first time.
|
| 38 |
+
_BASEBRANCH_ is the branch against which all new tests will be compared. It should normally be set to be the latest Moses stable release.
|
| 39 |
+
_MOSES\_PROFILER\_REPO_ is a path to a moses repository set up and built with profiling enabled. Optional if you want to produce profiling results.
|
| 40 |
+
_MOSES\_GOOGLE\_PROFILER\_REPO is a path to moses repository set up with full tcmalloc and profiler, as well as shared link for use with gperftools.
|
| 41 |
+
### Creating tests
|
| 42 |
+
|
| 43 |
+
In order to create a test one should go into the TEST_DIR and create a new folder. That folder will be used for the name of the test.
|
| 44 |
+
Inside that folder one should place a configuration file named **config**. The naming is mandatory.
|
| 45 |
+
An example such configuration file is **test\_config**
|
| 46 |
+
|
| 47 |
+
<pre>
|
| 48 |
+
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
|
| 49 |
+
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
|
| 50 |
+
Variants: vanilla, cached, ldpre, profile, google-profiler #Can't have cached without ldpre or vanilla
|
| 51 |
+
</pre>
|
| 52 |
+
|
| 53 |
+
The _Command:_ line specifies the executable (which is looked up in the /bin directory of the repo.) and any arguments necessary. Before running the test, the script cds to the current test directory so you can use relative paths.
|
| 54 |
+
The _LDPRE:_ specifies if tests should be run with any LD\_PRELOAD flags.
|
| 55 |
+
The _Variants:_ line specifies what type of tests should we run. This particular line will run the following tests:
|
| 56 |
+
1. A Vanilla test meaning just the command after _Command_ will be issued.
|
| 57 |
+
2. A vanilla cached test meaning that after the vanilla test, the test will be run again without dropping caches in order to benchmark performance on cached filesystem.
|
| 58 |
+
3. A test with LD_PRELOAD ldpreloads moses -f command. For each available LDPRELOAD comma separated library to preload.
|
| 59 |
+
4. A cached version of all LD_PRELOAD tests.
|
| 60 |
+
5. A profile variant is only available if you have setup the profiler repository. It produces gprof outputs for all of the above in a subdirectory inside the _TEST\_LOG\_DIR.
|
| 61 |
+
|
| 62 |
+
#### Produce profiler results.
|
| 63 |
+
If you want to produce profiler results together in some tests you need to specify the _MOSES\_PROFILER\_REPO_ in the config
|
| 64 |
+
```bash
|
| 65 |
+
git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-profile
|
| 66 |
+
cd mosesdecoder-profile
|
| 67 |
+
./bjam -j10 --with-cmph=/usr/include/ variant=profile
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
Afterwards for testcases which contain the **profile** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **gprof** output from every run (files ending in **\_profile**).
|
| 71 |
+
|
| 72 |
+
#### Produce google profiler results.
|
| 73 |
+
If you want to produce profiler results together in some tests you need to specify the _MOSES\_GOOGLE\_PROFILER\_REPO in the config
|
| 74 |
+
```bash
|
| 75 |
+
git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-google-profile
|
| 76 |
+
cd mosesdecoder
|
| 77 |
+
./bjam link=shared -j10 --full-tcmalloc --with-cmph=/usr/include/
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Afterwards for testcases which contain the **google-profiler** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **google-profiler** output from every run (files prefixed with **pprof**). To analyze the output you need to use [pprof](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html).
|
| 81 |
+
|
| 82 |
+
### Running tests.
|
| 83 |
+
Running the tests is done through the **runtests.py** script.
|
| 84 |
+
|
| 85 |
+
#### Running all tests.
|
| 86 |
+
To run all tests, with the base branch and the latests revision (and generate new basebranch test data if such is missing) do a:
|
| 87 |
+
```bash
|
| 88 |
+
python3 runtests.py -c testsuite_config
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
#### Running specific tests.
|
| 92 |
+
The script allows the user to manually run a particular test or to test against a specific branch or revision:
|
| 93 |
+
<pre>
|
| 94 |
+
moses-speedtest@crom:~/phrase_tables$ python3 runtests.py --help
|
| 95 |
+
usage: runtests.py [-h] -c CONFIGFILE [-s SINGLETESTDIR] [-r REVISION]
|
| 96 |
+
[-b BRANCH]
|
| 97 |
+
|
| 98 |
+
A python based speedtest suite for moses.
|
| 99 |
+
|
| 100 |
+
optional arguments:
|
| 101 |
+
-h, --help show this help message and exit
|
| 102 |
+
-c CONFIGFILE, --configfile CONFIGFILE
|
| 103 |
+
Specify test config file
|
| 104 |
+
-s SINGLETESTDIR, --singletest SINGLETESTDIR
|
| 105 |
+
Single test name directory. Specify directory name,
|
| 106 |
+
not full path!
|
| 107 |
+
-r REVISION, --revision REVISION
|
| 108 |
+
Specify a specific revison for the test.
|
| 109 |
+
-b BRANCH, --branch BRANCH
|
| 110 |
+
Specify a branch for the test.
|
| 111 |
+
</pre>
|
| 112 |
+
|
| 113 |
+
### Generating HTML report.
|
| 114 |
+
To generate a summary of the test results use the **html\_gen.py** script. It places a file named *index.html* in the current script directory.
|
| 115 |
+
```bash
|
| 116 |
+
python3 html_gen.py testsuite_config
|
| 117 |
+
```
|
| 118 |
+
You should use the generated file with the **style.css** file provided in the html directory.
|
| 119 |
+
|
| 120 |
+
### Command line regression testing.
|
| 121 |
+
Alternatively you could check for regressions from the command line using the **check\_fo\r_regression.py** script:
|
| 122 |
+
```bash
|
| 123 |
+
python3 check_for_regression.py TESTLOGS_DIRECTORY
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
Alternatively the results of all tests are logged inside the the specified TESTLOGS directory so you can manually check them for additional information such as date, time, revision, branch, etc...
|
| 127 |
+
|
| 128 |
+
### Create a cron job:
|
| 129 |
+
Create a cron job to run the tests daily and generate an html report. An example *cronjob* is available.
|
| 130 |
+
```bash
|
| 131 |
+
#!/bin/sh
|
| 132 |
+
cd /home/moses-speedtest/phrase_tables
|
| 133 |
+
|
| 134 |
+
python3 runtests.py -c testsuite_config #Run the tests.
|
| 135 |
+
python3 html_gen.py testsuite_config #Generate html
|
| 136 |
+
|
| 137 |
+
cp index.html /fs/thor4/html/www/speed-test/ #Update the html
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
Place the script in _/etc/cron.daily_ for dayly testing
|
| 141 |
+
|
| 142 |
+
###### Author
|
| 143 |
+
Nikolay Bogoychev, 2014
|
| 144 |
+
|
| 145 |
+
###### License
|
| 146 |
+
This software is licensed under the LGPL.
|
mosesdecoder/contrib/moses-speedtest/check_for_regression.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Checks if any of the latests tests has performed considerably different than
|
| 2 |
+
the previous ones. Takes the log directory as an argument."""
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
from testsuite_common import Result, processLogLine, bcolors, getLastTwoLines
|
| 6 |
+
|
| 7 |
+
LOGDIR = sys.argv[1] #Get the log directory as an argument
|
| 8 |
+
PERCENTAGE = 5 #Default value for how much a test shoudl change
|
| 9 |
+
if len(sys.argv) == 3:
|
| 10 |
+
PERCENTAGE = float(sys.argv[2]) #Default is 5%, but we can specify more
|
| 11 |
+
#line parameter
|
| 12 |
+
|
| 13 |
+
def printResults(regressed, better, unchanged, firsttime):
|
| 14 |
+
"""Pretty print the results in different colours"""
|
| 15 |
+
if regressed != []:
|
| 16 |
+
for item in regressed:
|
| 17 |
+
print(bcolors.RED + "REGRESSION! " + item.testname + " Was: "\
|
| 18 |
+
+ str(item.previous) + " Is: " + str(item.current) + " Change: "\
|
| 19 |
+
+ str(abs(item.percentage)) + "%. Revision: " + item.revision\
|
| 20 |
+
+ bcolors.ENDC)
|
| 21 |
+
print('\n')
|
| 22 |
+
if unchanged != []:
|
| 23 |
+
for item in unchanged:
|
| 24 |
+
print(bcolors.BLUE + "UNCHANGED: " + item.testname + " Revision: " +\
|
| 25 |
+
item.revision + bcolors.ENDC)
|
| 26 |
+
print('\n')
|
| 27 |
+
if better != []:
|
| 28 |
+
for item in better:
|
| 29 |
+
print(bcolors.GREEN + "IMPROVEMENT! " + item.testname + " Was: "\
|
| 30 |
+
+ str(item.previous) + " Is: " + str(item.current) + " Change: "\
|
| 31 |
+
+ str(abs(item.percentage)) + "%. Revision: " + item.revision\
|
| 32 |
+
+ bcolors.ENDC)
|
| 33 |
+
if firsttime != []:
|
| 34 |
+
for item in firsttime:
|
| 35 |
+
print(bcolors.PURPLE + "First time test! " + item.testname +\
|
| 36 |
+
" Took: " + str(item.real) + " seconds. Revision: " +\
|
| 37 |
+
item.revision + bcolors.ENDC)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
all_files = os.listdir(LOGDIR)
|
| 41 |
+
regressed = []
|
| 42 |
+
better = []
|
| 43 |
+
unchanged = []
|
| 44 |
+
firsttime = []
|
| 45 |
+
|
| 46 |
+
#Go through all log files and find which tests have performed better.
|
| 47 |
+
for logfile in all_files:
|
| 48 |
+
(line1, line2) = getLastTwoLines(logfile, LOGDIR)
|
| 49 |
+
log1 = processLogLine(line1)
|
| 50 |
+
if line2 == '\n': # Empty line, only one test ever run
|
| 51 |
+
firsttime.append(log1)
|
| 52 |
+
continue
|
| 53 |
+
log2 = processLogLine(line2)
|
| 54 |
+
res = Result(log1.testname, log1.real, log2.real, log2.revision,\
|
| 55 |
+
log2.branch, log1.revision, log1.branch)
|
| 56 |
+
if res.percentage < -PERCENTAGE:
|
| 57 |
+
regressed.append(res)
|
| 58 |
+
elif res.change > PERCENTAGE:
|
| 59 |
+
better.append(res)
|
| 60 |
+
else:
|
| 61 |
+
unchanged.append(res)
|
| 62 |
+
|
| 63 |
+
printResults(regressed, better, unchanged, firsttime)
|
mosesdecoder/contrib/moses-speedtest/cronjob
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
cd /home/moses-speedtest/phrase_tables
|
| 3 |
+
|
| 4 |
+
python3 runtests.py -c testsuite_config #Run the tests.
|
| 5 |
+
python3 html_gen.py testsuite_config #Generate html
|
| 6 |
+
|
| 7 |
+
cp index.html /fs/thor4/html/www/speed-test/ #Update the html
|
mosesdecoder/contrib/moses-speedtest/runtests.py
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Given a config file, runs tests"""
|
| 2 |
+
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
import time
|
| 5 |
+
import shutil
|
| 6 |
+
from argparse import ArgumentParser
|
| 7 |
+
from testsuite_common import processLogLine
|
| 8 |
+
|
| 9 |
+
def parse_cmd():
|
| 10 |
+
"""Parse the command line arguments"""
|
| 11 |
+
description = "A python based speedtest suite for moses."
|
| 12 |
+
parser = ArgumentParser(description=description)
|
| 13 |
+
parser.add_argument("-c", "--configfile", action="store",\
|
| 14 |
+
dest="configfile", required=True,\
|
| 15 |
+
help="Specify test config file")
|
| 16 |
+
parser.add_argument("-s", "--singletest", action="store",\
|
| 17 |
+
dest="singletestdir", default=None,\
|
| 18 |
+
help="Single test name directory. Specify directory name,\
|
| 19 |
+
not full path!")
|
| 20 |
+
parser.add_argument("-r", "--revision", action="store",\
|
| 21 |
+
dest="revision", default=None,\
|
| 22 |
+
help="Specify a specific revison for the test.")
|
| 23 |
+
parser.add_argument("-b", "--branch", action="store",\
|
| 24 |
+
dest="branch", default=None,\
|
| 25 |
+
help="Specify a branch for the test.")
|
| 26 |
+
|
| 27 |
+
arguments = parser.parse_args()
|
| 28 |
+
return arguments
|
| 29 |
+
|
| 30 |
+
def repoinit(testconfig, profiler=None):
|
| 31 |
+
"""Determines revision and sets up the repo. If given the profiler optional
|
| 32 |
+
argument, wil init the profiler repo instead of the default one."""
|
| 33 |
+
revision = ''
|
| 34 |
+
#Update the repo
|
| 35 |
+
if profiler == "gnu-profiler":
|
| 36 |
+
if testconfig.repo_prof is not None:
|
| 37 |
+
os.chdir(testconfig.repo_prof)
|
| 38 |
+
else:
|
| 39 |
+
raise ValueError('Profiling repo is not defined')
|
| 40 |
+
elif profiler == "google-profiler":
|
| 41 |
+
if testconfig.repo_gprof is not None:
|
| 42 |
+
os.chdir(testconfig.repo_gprof)
|
| 43 |
+
else:
|
| 44 |
+
raise ValueError('Profiling repo is not defined')
|
| 45 |
+
else:
|
| 46 |
+
os.chdir(testconfig.repo)
|
| 47 |
+
#Checkout specific branch, else maintain main branch
|
| 48 |
+
if testconfig.branch != 'master':
|
| 49 |
+
subprocess.call(['git', 'checkout', testconfig.branch])
|
| 50 |
+
rev, _ = subprocess.Popen(['git', 'rev-parse', 'HEAD'],\
|
| 51 |
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
|
| 52 |
+
revision = str(rev).replace("\\n'", '').replace("b'", '')
|
| 53 |
+
else:
|
| 54 |
+
subprocess.call(['git checkout master'], shell=True)
|
| 55 |
+
|
| 56 |
+
#Check a specific revision. Else checkout master.
|
| 57 |
+
if testconfig.revision:
|
| 58 |
+
subprocess.call(['git', 'checkout', testconfig.revision])
|
| 59 |
+
revision = testconfig.revision
|
| 60 |
+
elif testconfig.branch == 'master':
|
| 61 |
+
subprocess.call(['git pull'], shell=True)
|
| 62 |
+
rev, _ = subprocess.Popen(['git rev-parse HEAD'], stdout=subprocess.PIPE,\
|
| 63 |
+
stderr=subprocess.PIPE, shell=True).communicate()
|
| 64 |
+
revision = str(rev).replace("\\n'", '').replace("b'", '')
|
| 65 |
+
|
| 66 |
+
return revision
|
| 67 |
+
|
| 68 |
+
class Configuration:
|
| 69 |
+
"""A simple class to hold all of the configuration constatns"""
|
| 70 |
+
def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None, repo_gprof=None):
|
| 71 |
+
self.repo = repo
|
| 72 |
+
self.repo_prof = repo_prof
|
| 73 |
+
self.repo_gprof = repo_gprof
|
| 74 |
+
self.drop_caches = drop_caches
|
| 75 |
+
self.tests = tests
|
| 76 |
+
self.testlogs = testlogs
|
| 77 |
+
self.basebranch = basebranch
|
| 78 |
+
self.baserev = baserev
|
| 79 |
+
self.singletest = None
|
| 80 |
+
self.revision = None
|
| 81 |
+
self.branch = 'master' # Default branch
|
| 82 |
+
|
| 83 |
+
def additional_args(self, singletest, revision, branch):
|
| 84 |
+
"""Additional configuration from command line arguments"""
|
| 85 |
+
self.singletest = singletest
|
| 86 |
+
if revision is not None:
|
| 87 |
+
self.revision = revision
|
| 88 |
+
if branch is not None:
|
| 89 |
+
self.branch = branch
|
| 90 |
+
|
| 91 |
+
def set_revision(self, revision):
|
| 92 |
+
"""Sets the current revision that is being tested"""
|
| 93 |
+
self.revision = revision
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class Test:
|
| 97 |
+
"""A simple class to contain all information about tests"""
|
| 98 |
+
def __init__(self, name, command, ldopts, permutations, prof_command=None, gprof_command=None):
|
| 99 |
+
self.name = name
|
| 100 |
+
self.command = command
|
| 101 |
+
self.prof_command = prof_command
|
| 102 |
+
self.gprof_command = gprof_command
|
| 103 |
+
self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet
|
| 104 |
+
self.permutations = permutations
|
| 105 |
+
|
| 106 |
+
def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None, moses_gprof_repo=None):
|
| 107 |
+
"""Parses the config file"""
|
| 108 |
+
command, ldopts, prof_command, gprof_command = '', '', None, None
|
| 109 |
+
permutations = []
|
| 110 |
+
fileopen = open(conffile, 'r')
|
| 111 |
+
for line in fileopen:
|
| 112 |
+
line = line.split('#')[0] # Discard comments
|
| 113 |
+
if line == '' or line == '\n':
|
| 114 |
+
continue # Discard lines with comments only and empty lines
|
| 115 |
+
opt, args = line.split(' ', 1) # Get arguments
|
| 116 |
+
|
| 117 |
+
if opt == 'Command:':
|
| 118 |
+
command = args.replace('\n', '')
|
| 119 |
+
if moses_prof_repo is not None: # Get optional command for profiling
|
| 120 |
+
prof_command = moses_prof_repo + '/bin/' + command
|
| 121 |
+
if moses_gprof_repo is not None: # Get optional command for google-perftools
|
| 122 |
+
gprof_command = moses_gprof_repo + '/bin/' + command
|
| 123 |
+
command = moses_repo + '/bin/' + command
|
| 124 |
+
elif opt == 'LDPRE:':
|
| 125 |
+
ldopts = args.replace('\n', '')
|
| 126 |
+
elif opt == 'Variants:':
|
| 127 |
+
permutations = args.replace('\n', '').replace(' ', '').split(',')
|
| 128 |
+
else:
|
| 129 |
+
raise ValueError('Unrecognized option ' + opt)
|
| 130 |
+
#We use the testdir as the name.
|
| 131 |
+
testcase = Test(testdir, command, ldopts, permutations, prof_command, gprof_command)
|
| 132 |
+
fileopen.close()
|
| 133 |
+
return testcase
|
| 134 |
+
|
| 135 |
+
def parse_testconfig(conffile):
|
| 136 |
+
"""Parses the config file for the whole testsuite."""
|
| 137 |
+
repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', ''
|
| 138 |
+
basebranch, baserev, repo_prof_path, repo_gprof_path = '', '', None, None
|
| 139 |
+
fileopen = open(conffile, 'r')
|
| 140 |
+
for line in fileopen:
|
| 141 |
+
line = line.split('#')[0] # Discard comments
|
| 142 |
+
if line == '' or line == '\n':
|
| 143 |
+
continue # Discard lines with comments only and empty lines
|
| 144 |
+
opt, args = line.split(' ', 1) # Get arguments
|
| 145 |
+
if opt == 'MOSES_REPO_PATH:':
|
| 146 |
+
repo_path = args.replace('\n', '')
|
| 147 |
+
elif opt == 'DROP_CACHES_COMM:':
|
| 148 |
+
drop_caches = args.replace('\n', '')
|
| 149 |
+
elif opt == 'TEST_DIR:':
|
| 150 |
+
tests_dir = args.replace('\n', '')
|
| 151 |
+
elif opt == 'TEST_LOG_DIR:':
|
| 152 |
+
testlog_dir = args.replace('\n', '')
|
| 153 |
+
elif opt == 'BASEBRANCH:':
|
| 154 |
+
basebranch = args.replace('\n', '')
|
| 155 |
+
elif opt == 'BASEREV:':
|
| 156 |
+
baserev = args.replace('\n', '')
|
| 157 |
+
elif opt == 'MOSES_PROFILER_REPO:': # Optional
|
| 158 |
+
repo_prof_path = args.replace('\n', '')
|
| 159 |
+
elif opt == 'MOSES_GOOGLE_PROFILER_REPO:': # Optional
|
| 160 |
+
repo_gprof_path = args.replace('\n', '')
|
| 161 |
+
else:
|
| 162 |
+
raise ValueError('Unrecognized option ' + opt)
|
| 163 |
+
config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\
|
| 164 |
+
basebranch, baserev, repo_prof_path, repo_gprof_path)
|
| 165 |
+
fileopen.close()
|
| 166 |
+
return config
|
| 167 |
+
|
| 168 |
+
def get_config():
|
| 169 |
+
"""Builds the config object with all necessary attributes"""
|
| 170 |
+
args = parse_cmd()
|
| 171 |
+
config = parse_testconfig(args.configfile)
|
| 172 |
+
config.additional_args(args.singletestdir, args.revision, args.branch)
|
| 173 |
+
revision = repoinit(config)
|
| 174 |
+
if config.repo_prof is not None:
|
| 175 |
+
repoinit(config, "gnu-profiler")
|
| 176 |
+
if config.repo_gprof is not None:
|
| 177 |
+
repoinit(config, "google-profiler")
|
| 178 |
+
config.set_revision(revision)
|
| 179 |
+
return config
|
| 180 |
+
|
| 181 |
+
def check_for_basever(testlogfile, basebranch):
|
| 182 |
+
"""Checks if the base revision is present in the testlogs"""
|
| 183 |
+
filetoopen = open(testlogfile, 'r')
|
| 184 |
+
for line in filetoopen:
|
| 185 |
+
templine = processLogLine(line)
|
| 186 |
+
if templine.branch == basebranch:
|
| 187 |
+
return True
|
| 188 |
+
return False
|
| 189 |
+
|
| 190 |
+
def split_time(filename):
|
| 191 |
+
"""Splits the output of the time function into seperate parts.
|
| 192 |
+
We will write time to file, because many programs output to
|
| 193 |
+
stderr which makes it difficult to get only the exact results we need."""
|
| 194 |
+
timefile = open(filename, 'r')
|
| 195 |
+
realtime = float(timefile.readline().replace('\n', '').split()[1])
|
| 196 |
+
usertime = float(timefile.readline().replace('\n', '').split()[1])
|
| 197 |
+
systime = float(timefile.readline().replace('\n', '').split()[1])
|
| 198 |
+
timefile.close()
|
| 199 |
+
|
| 200 |
+
return (realtime, usertime, systime)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def write_log(time_file, logname, config):
|
| 204 |
+
"""Writes to a logfile"""
|
| 205 |
+
log_write = open(config.testlogs + '/' + logname, 'a') # Open logfile
|
| 206 |
+
date_run = time.strftime("%d.%m.%Y %H:%M:%S") # Get the time of the test
|
| 207 |
+
realtime, usertime, systime = split_time(time_file) # Get the times in a nice form
|
| 208 |
+
|
| 209 |
+
# Append everything to a log file.
|
| 210 |
+
writestr = date_run + " " + config.revision + " Testname: " + logname +\
|
| 211 |
+
" RealTime: " + str(realtime) + " UserTime: " + str(usertime) +\
|
| 212 |
+
" SystemTime: " + str(systime) + " Branch: " + config.branch +'\n'
|
| 213 |
+
log_write.write(writestr)
|
| 214 |
+
log_write.close()
|
| 215 |
+
|
| 216 |
+
def write_gprof(command, name, variant, config):
|
| 217 |
+
"""Produces a gprof report from a gmon file"""
|
| 218 |
+
#Check if we have a directory for the profiling of this testcase:
|
| 219 |
+
output_dir = config.testlogs + '/' + name
|
| 220 |
+
if not os.path.exists(output_dir):
|
| 221 |
+
os.makedirs(output_dir)
|
| 222 |
+
outputfile = output_dir + '/' + time.strftime("%d.%m.%Y_%H:%M:%S") + '_' + name + '_' + variant
|
| 223 |
+
|
| 224 |
+
#Compile a gprof command and output the file in the directory we just created
|
| 225 |
+
gmon_path = os.getcwd() + '/gmon.out' # Path to the profiling file
|
| 226 |
+
executable_path = command.split(' ')[0] # Path to the moses binary
|
| 227 |
+
gprof_command = 'gprof ' + executable_path + ' ' + gmon_path + ' > ' + outputfile
|
| 228 |
+
subprocess.call([gprof_command], shell=True)
|
| 229 |
+
os.remove(gmon_path) # After we are done discard the gmon file
|
| 230 |
+
|
| 231 |
+
def write_pprof(name, variant, config):
|
| 232 |
+
"""Copies the google-perftools profiler output to the corresponding test directory"""
|
| 233 |
+
output_dir = config.testlogs + '/' + name
|
| 234 |
+
if not os.path.exists(output_dir):
|
| 235 |
+
os.makedirs(output_dir)
|
| 236 |
+
outputfile = output_dir + '/pprof_' + time.strftime("%d.%m.%Y_%H:%M:%S") + '_' + name + '_' + variant
|
| 237 |
+
shutil.move("/tmp/moses.prof", outputfile)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def execute_test(command, path, name, variant, config, profile=None):
|
| 241 |
+
"""Executes a testcase given a whole command, path to the test file output,
|
| 242 |
+
name of the test and variant tested. Config is the global configuration"""
|
| 243 |
+
subprocess.Popen([command], stdout=None, stderr=subprocess.PIPE, shell=True).communicate()
|
| 244 |
+
if profile is None:
|
| 245 |
+
write_log(path, name + '_' + variant, config)
|
| 246 |
+
elif profile == "gnu-profiler": # Basically produce a gmon output
|
| 247 |
+
write_gprof(command, name, variant, config)
|
| 248 |
+
elif profile == "google-profiler":
|
| 249 |
+
write_pprof(name, variant, config)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def execute_tests(testcase, cur_directory, config):
|
| 253 |
+
"""Executes timed tests based on the config file"""
|
| 254 |
+
#Several global commands related to the time wrapper
|
| 255 |
+
time_command = ' time -p -o /tmp/time_moses_tests '
|
| 256 |
+
time_path = '/tmp/time_moses_tests'
|
| 257 |
+
|
| 258 |
+
#Figure out the order of which tests must be executed.
|
| 259 |
+
#Change to the current test directory
|
| 260 |
+
os.chdir(config.tests + '/' + cur_directory)
|
| 261 |
+
#Clear caches
|
| 262 |
+
subprocess.call(['sync'], shell=True)
|
| 263 |
+
subprocess.call([config.drop_caches], shell=True)
|
| 264 |
+
#Perform vanilla test and if a cached test exists - as well
|
| 265 |
+
print(testcase.name)
|
| 266 |
+
if 'vanilla' in testcase.permutations:
|
| 267 |
+
#Create the command for executing moses
|
| 268 |
+
whole_command = time_command + testcase.command
|
| 269 |
+
|
| 270 |
+
#test normal and cached
|
| 271 |
+
execute_test(whole_command, time_path, testcase.name, 'vanilla', config)
|
| 272 |
+
if 'cached' in testcase.permutations:
|
| 273 |
+
execute_test(whole_command, time_path, testcase.name, 'vanilla_cached', config)
|
| 274 |
+
|
| 275 |
+
#Now perform LD_PRELOAD tests
|
| 276 |
+
if 'ldpre' in testcase.permutations:
|
| 277 |
+
for opt in testcase.ldopts:
|
| 278 |
+
#Clear caches
|
| 279 |
+
subprocess.call(['sync'], shell=True)
|
| 280 |
+
subprocess.call([config.drop_caches], shell=True)
|
| 281 |
+
|
| 282 |
+
#Create the command for executing moses:
|
| 283 |
+
whole_command = 'LD_PRELOAD=' + opt + time_command + testcase.command
|
| 284 |
+
variant = 'ldpre_' + opt
|
| 285 |
+
|
| 286 |
+
#test normal and cached
|
| 287 |
+
execute_test(whole_command, time_path, testcase.name, variant, config)
|
| 288 |
+
if 'cached' in testcase.permutations:
|
| 289 |
+
execute_test(whole_command, time_path, testcase.name, variant + '_cached', config)
|
| 290 |
+
|
| 291 |
+
#Perform profiling test. Mostly same as the above lines but necessary duplication.
|
| 292 |
+
#All actual code is inside execute_test so those lines shouldn't need modifying
|
| 293 |
+
if 'profile' in testcase.permutations:
|
| 294 |
+
subprocess.call(['sync'], shell=True) # Drop caches first
|
| 295 |
+
subprocess.call([config.drop_caches], shell=True)
|
| 296 |
+
|
| 297 |
+
if 'vanilla' in testcase.permutations:
|
| 298 |
+
whole_command = testcase.prof_command
|
| 299 |
+
execute_test(whole_command, time_path, testcase.name, 'profile', config, "gnu-profiler")
|
| 300 |
+
if 'cached' in testcase.permutations:
|
| 301 |
+
execute_test(whole_command, time_path, testcase.name, 'profile_cached', config, "gnu-profiler")
|
| 302 |
+
|
| 303 |
+
if 'ldpre' in testcase.permutations:
|
| 304 |
+
for opt in testcase.ldopts:
|
| 305 |
+
#Clear caches
|
| 306 |
+
subprocess.call(['sync'], shell=True)
|
| 307 |
+
subprocess.call([config.drop_caches], shell=True)
|
| 308 |
+
|
| 309 |
+
#Create the command for executing moses:
|
| 310 |
+
whole_command = 'LD_PRELOAD=' + opt + " " + testcase.prof_command
|
| 311 |
+
variant = 'profile_ldpre_' + opt
|
| 312 |
+
|
| 313 |
+
#test normal and cached
|
| 314 |
+
execute_test(whole_command, time_path, testcase.name, variant, config, "gnu-profiler")
|
| 315 |
+
if 'cached' in testcase.permutations:
|
| 316 |
+
execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, "gnu-profiler")
|
| 317 |
+
|
| 318 |
+
#Google-perftools profiler
|
| 319 |
+
if 'google-profiler' in testcase.permutations:
|
| 320 |
+
subprocess.call(['sync'], shell=True) # Drop caches first
|
| 321 |
+
subprocess.call([config.drop_caches], shell=True)
|
| 322 |
+
|
| 323 |
+
#Create the command for executing moses
|
| 324 |
+
whole_command = "CPUPROFILE=/tmp/moses.prof " + testcase.gprof_command
|
| 325 |
+
|
| 326 |
+
#test normal and cached
|
| 327 |
+
execute_test(whole_command, time_path, testcase.name, 'vanilla', config, 'google-profiler')
|
| 328 |
+
if 'cached' in testcase.permutations:
|
| 329 |
+
execute_test(whole_command, time_path, testcase.name, 'vanilla_cached', config, 'google-profiler')
|
| 330 |
+
|
| 331 |
+
#Now perform LD_PRELOAD tests
|
| 332 |
+
if 'ldpre' in testcase.permutations:
|
| 333 |
+
for opt in testcase.ldopts:
|
| 334 |
+
#Clear caches
|
| 335 |
+
subprocess.call(['sync'], shell=True)
|
| 336 |
+
subprocess.call([config.drop_caches], shell=True)
|
| 337 |
+
|
| 338 |
+
#Create the command for executing moses:
|
| 339 |
+
whole_command = 'LD_PRELOAD=' + opt + " " + whole_command
|
| 340 |
+
variant = 'ldpre_' + opt
|
| 341 |
+
|
| 342 |
+
#test normal and cached
|
| 343 |
+
execute_test(whole_command, time_path, testcase.name, variant, config, 'google-profiler')
|
| 344 |
+
if 'cached' in testcase.permutations:
|
| 345 |
+
execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, 'google-profiler')
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
# Go through all the test directories and executes tests
|
| 349 |
+
if __name__ == '__main__':
|
| 350 |
+
CONFIG = get_config()
|
| 351 |
+
ALL_DIR = os.listdir(CONFIG.tests)
|
| 352 |
+
|
| 353 |
+
#We should first check if any of the tests is run for the first time.
|
| 354 |
+
#If some of them are run for the first time we should first get their
|
| 355 |
+
#time with the base version (usually the previous release)
|
| 356 |
+
FIRSTTIME = []
|
| 357 |
+
TESTLOGS = []
|
| 358 |
+
#Strip filenames of test underscores
|
| 359 |
+
for listline in os.listdir(CONFIG.testlogs):
|
| 360 |
+
listline = listline.replace('_vanilla', '')
|
| 361 |
+
listline = listline.replace('_cached', '')
|
| 362 |
+
listline = listline.replace('_ldpre', '')
|
| 363 |
+
TESTLOGS.append(listline)
|
| 364 |
+
for directory in ALL_DIR:
|
| 365 |
+
if directory not in TESTLOGS:
|
| 366 |
+
FIRSTTIME.append(directory)
|
| 367 |
+
|
| 368 |
+
#Sometimes even though we have the log files, we will need to rerun them
|
| 369 |
+
#Against a base version, because we require a different baseversion (for
|
| 370 |
+
#example when a new version of Moses is released.) Therefore we should
|
| 371 |
+
#Check if the version of Moses that we have as a base version is in all
|
| 372 |
+
#of the log files.
|
| 373 |
+
|
| 374 |
+
for logfile in os.listdir(CONFIG.testlogs):
|
| 375 |
+
logfile_name = CONFIG.testlogs + '/' + logfile
|
| 376 |
+
if os.path.isfile(logfile_name) and not check_for_basever(logfile_name, CONFIG.basebranch):
|
| 377 |
+
logfile = logfile.replace('_vanilla', '')
|
| 378 |
+
logfile = logfile.replace('_cached', '')
|
| 379 |
+
logfile = logfile.replace('_ldpre', '')
|
| 380 |
+
FIRSTTIME.append(logfile)
|
| 381 |
+
FIRSTTIME = list(set(FIRSTTIME)) #Deduplicate
|
| 382 |
+
|
| 383 |
+
if FIRSTTIME != []:
|
| 384 |
+
#Create a new configuration for base version tests:
|
| 385 |
+
BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\
|
| 386 |
+
CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\
|
| 387 |
+
CONFIG.baserev, CONFIG.repo_prof, CONFIG.repo_gprof)
|
| 388 |
+
BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch)
|
| 389 |
+
#Set up the repository and get its revision:
|
| 390 |
+
REVISION = repoinit(BASECONFIG)
|
| 391 |
+
BASECONFIG.set_revision(REVISION)
|
| 392 |
+
#Build
|
| 393 |
+
os.chdir(BASECONFIG.repo)
|
| 394 |
+
subprocess.call(['./previous.sh'], shell=True)
|
| 395 |
+
#If profiler configuration exists also init it
|
| 396 |
+
if BASECONFIG.repo_prof is not None:
|
| 397 |
+
repoinit(BASECONFIG, "gnu-profiler")
|
| 398 |
+
os.chdir(BASECONFIG.repo_prof)
|
| 399 |
+
subprocess.call(['./previous.sh'], shell=True)
|
| 400 |
+
|
| 401 |
+
if BASECONFIG.repo_gprof is not None:
|
| 402 |
+
repoinit(BASECONFIG, "google-profiler")
|
| 403 |
+
os.chdir(BASECONFIG.repo_gprof)
|
| 404 |
+
subprocess.call(['./previous.sh'], shell=True)
|
| 405 |
+
|
| 406 |
+
#Perform tests
|
| 407 |
+
for directory in FIRSTTIME:
|
| 408 |
+
cur_testcase = parse_configfile(BASECONFIG.tests + '/' + directory +\
|
| 409 |
+
'/config', directory, BASECONFIG.repo, BASECONFIG.repo_prof, BASECONFIG.repo_gprof)
|
| 410 |
+
execute_tests(cur_testcase, directory, BASECONFIG)
|
| 411 |
+
|
| 412 |
+
#Reset back the repository to the normal configuration
|
| 413 |
+
repoinit(CONFIG)
|
| 414 |
+
if BASECONFIG.repo_prof is not None:
|
| 415 |
+
repoinit(CONFIG, "gnu-profiler")
|
| 416 |
+
|
| 417 |
+
if BASECONFIG.repo_gprof is not None:
|
| 418 |
+
repoinit(CONFIG, "google-profiler")
|
| 419 |
+
|
| 420 |
+
#Builds moses
|
| 421 |
+
os.chdir(CONFIG.repo)
|
| 422 |
+
subprocess.call(['./previous.sh'], shell=True)
|
| 423 |
+
if CONFIG.repo_prof is not None:
|
| 424 |
+
os.chdir(CONFIG.repo_prof)
|
| 425 |
+
subprocess.call(['./previous.sh'], shell=True)
|
| 426 |
+
|
| 427 |
+
if CONFIG.repo_gprof is not None:
|
| 428 |
+
os.chdir(CONFIG.repo_gprof)
|
| 429 |
+
subprocess.call(['./previous.sh'], shell=True)
|
| 430 |
+
|
| 431 |
+
if CONFIG.singletest:
|
| 432 |
+
TESTCASE = parse_configfile(CONFIG.tests + '/' +\
|
| 433 |
+
CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof)
|
| 434 |
+
execute_tests(TESTCASE, CONFIG.singletest, CONFIG)
|
| 435 |
+
else:
|
| 436 |
+
for directory in ALL_DIR:
|
| 437 |
+
cur_testcase = parse_configfile(CONFIG.tests + '/' + directory +\
|
| 438 |
+
'/config', directory, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof)
|
| 439 |
+
execute_tests(cur_testcase, directory, CONFIG)
|
mosesdecoder/contrib/moses-speedtest/sys_drop_caches.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/spython
|
| 2 |
+
from sys import argv, stderr, exit
|
| 3 |
+
from os import linesep as ls
|
| 4 |
+
procfile = "/proc/sys/vm/drop_caches"
|
| 5 |
+
options = ["1","2","3"]
|
| 6 |
+
flush_type = None
|
| 7 |
+
try:
|
| 8 |
+
flush_type = argv[1][0:1]
|
| 9 |
+
if not flush_type in options:
|
| 10 |
+
raise IndexError, "not in options"
|
| 11 |
+
with open(procfile, "w") as f:
|
| 12 |
+
f.write("%s%s" % (flush_type,ls))
|
| 13 |
+
exit(0)
|
| 14 |
+
except IndexError, e:
|
| 15 |
+
stderr.write("Argument %s required.%s" % (options, ls))
|
| 16 |
+
except IOError, e:
|
| 17 |
+
stderr.write("Error writing to file.%s" % ls)
|
| 18 |
+
except StandardError, e:
|
| 19 |
+
stderr.write("Unknown Error.%s" % ls)
|
| 20 |
+
|
| 21 |
+
exit(1)
|
| 22 |
+
|
mosesdecoder/contrib/moses-speedtest/test_config
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
|
| 2 |
+
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
|
| 3 |
+
Variants: vanilla, cached, ldpre #Can't have cached without ldpre or vanilla
|
mosesdecoder/contrib/moses-speedtest/testsuite_config
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
|
| 2 |
+
DROP_CACHES_COMM: sys_drop_caches 3
|
| 3 |
+
TEST_DIR: /home/moses-speedtest/phrase_tables/tests
|
| 4 |
+
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
|
| 5 |
+
BASEBRANCH: RELEASE-2.1.1
|
mosesdecoder/contrib/picaro/README
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README - 16 Jan 2011b
|
| 2 |
+
Author: Jason Riesa <jason.riesa@gmail.com>
|
| 3 |
+
|
| 4 |
+
Picaro [v1.0]: A simple command-line alignment visualization tool.
|
| 5 |
+
Visualize alignments in grid-format.
|
| 6 |
+
|
| 7 |
+
This brief README is organized as follows:
|
| 8 |
+
I. REQUIREMENTS
|
| 9 |
+
II. USAGE
|
| 10 |
+
III. INPUT FORMAT
|
| 11 |
+
IV. EXAMPLE USAGE
|
| 12 |
+
V. NOTES
|
| 13 |
+
|
| 14 |
+
I. REQUIREMENTS
|
| 15 |
+
===============
|
| 16 |
+
Python v2.5 or higher is required.
|
| 17 |
+
|
| 18 |
+
II. USAGE
|
| 19 |
+
=========
|
| 20 |
+
Picaro takes as input 3 mandatory arguments and up to 2 optional arguments:
|
| 21 |
+
Mandatory arguments:
|
| 22 |
+
1. -a1 <alignment1> where alignment1 is a path to an alignment file
|
| 23 |
+
2. -e <e> where e is a path to a file of English sentences
|
| 24 |
+
3. -f <f> where f is a path to a file of French sentences
|
| 25 |
+
Optional arguments:
|
| 26 |
+
1. -a2 <a2> path to alignment2 file in f-e format
|
| 27 |
+
2. -maxlen <len> for each sentence pair, render only when each
|
| 28 |
+
sentence has length in words <= len
|
| 29 |
+
|
| 30 |
+
For historical reasons we use the labels e, f, English, and French,
|
| 31 |
+
but any language pair will do.
|
| 32 |
+
|
| 33 |
+
III. INPUT FORMAT
|
| 34 |
+
=================
|
| 35 |
+
- Files e and f must be sentence-aligned
|
| 36 |
+
- Alignment files must be in f-e format
|
| 37 |
+
See included sample files in zh/ and es/.
|
| 38 |
+
|
| 39 |
+
IV. EXAMPLE USAGE
|
| 40 |
+
=================
|
| 41 |
+
WITH A SINGLE ALIGNMENT:
|
| 42 |
+
$ picaro.py -e zh/sample.e -f zh/sample.f -a1 zh/sample.aln
|
| 43 |
+
|
| 44 |
+
COMPARING TWO ALIGNMENTS:
|
| 45 |
+
$ picaro.py -e zh/sample.e -f zh/sample.f -a1 zh/alternate.aln -a2 zh/sample.aln
|
| 46 |
+
|
| 47 |
+
When visualizing two alignments at once, refer to the following color scheme:
|
| 48 |
+
Green blocks: alignments a1 and a2 agree
|
| 49 |
+
Blue blocks: alignment a1 only
|
| 50 |
+
Gold blocks: alignment a2 only
|
| 51 |
+
|
| 52 |
+
V. NOTES
|
| 53 |
+
========
|
| 54 |
+
RIGHT-TO-LEFT TEXT:
|
| 55 |
+
If you are using right-to-left text, e.g. Arabic, transliterate your text first.
|
| 56 |
+
Terminals generally render unexpectedly with mixed left-to-right and right-to-left text.
|
| 57 |
+
For Arabic, in particular, we use the Buckwalter translitation scheme [1] when using this tool.
|
| 58 |
+
The following Perl module implements Buckwalter transliteration:
|
| 59 |
+
http://search.cpan.org/~smrz/Encode-Arabic-1.8/lib/Encode/Arabic.pm
|
| 60 |
+
|
| 61 |
+
[1] http://www.ldc.upenn.edu/myl/morph/buckwalter.html
|
| 62 |
+
|
mosesdecoder/contrib/picaro/es/README
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Spanish-English sample
|
| 2 |
+
sample.f Spanish text
|
| 3 |
+
sample.e English text
|
| 4 |
+
sample.a Alignment file with links in f-e format
|
mosesdecoder/contrib/picaro/es/sample.aln
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0-0 0-1 1-2 1-3 2-4 3-5 4-6 5-7
|
mosesdecoder/contrib/picaro/es/sample.e
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
i want to go to spain tomorrow .
|
mosesdecoder/contrib/picaro/es/sample.f
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
quiero ir a españa mañana .
|
mosesdecoder/contrib/picaro/picaro.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
#
|
| 3 |
+
# Picaro: An simple command-line alignment visualization tool.
|
| 4 |
+
#
|
| 5 |
+
# picaro.py
|
| 6 |
+
# Visualize alignments between sentences in a grid format.
|
| 7 |
+
#
|
| 8 |
+
# Jason Riesa <riesa@isi.edu>
|
| 9 |
+
# version: 01-16-2010
|
| 10 |
+
#
|
| 11 |
+
# Copyright (C) 2013 Jason Riesa
|
| 12 |
+
#
|
| 13 |
+
# This library is free software; you can redistribute it and/or
|
| 14 |
+
# modify it under the terms of the GNU Lesser General Public
|
| 15 |
+
# License as published by the Free Software Foundation; either
|
| 16 |
+
# version 2.1 of the License, or (at your option) any later version.
|
| 17 |
+
#
|
| 18 |
+
# This library is distributed in the hope that it will be useful,
|
| 19 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 20 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 21 |
+
# Lesser General Public License for more details.
|
| 22 |
+
#
|
| 23 |
+
# You should have received a copy of the GNU Lesser General Public
|
| 24 |
+
# License along with this library; if not, write to the Free Software
|
| 25 |
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 26 |
+
|
| 27 |
+
import sys, os, commands
|
| 28 |
+
from collections import defaultdict
|
| 29 |
+
|
| 30 |
+
#TC_BIN = "tc/tc.linux32"
|
| 31 |
+
|
| 32 |
+
a1_file_str = ""
|
| 33 |
+
a2_file_str = ""
|
| 34 |
+
f_file_str = ""
|
| 35 |
+
e_file_str = ""
|
| 36 |
+
SHOW_TC_A1 = 0
|
| 37 |
+
SHOW_TC_A2 = 0
|
| 38 |
+
maxlen = float('inf')
|
| 39 |
+
|
| 40 |
+
# Process command line options
|
| 41 |
+
try:
|
| 42 |
+
while len(sys.argv) > 1:
|
| 43 |
+
option = sys.argv[1]; del sys.argv[1]
|
| 44 |
+
if option == '-a1':
|
| 45 |
+
a1_file_str = sys.argv[1]; del sys.argv[1]
|
| 46 |
+
elif option == '-a2':
|
| 47 |
+
a2_file_str = sys.argv[1]; del sys.argv[1]
|
| 48 |
+
elif option == '-f':
|
| 49 |
+
f_file_str = sys.argv[1]; del sys.argv[1]
|
| 50 |
+
elif option == '-e':
|
| 51 |
+
e_file_str = sys.argv[1]; del sys.argv[1]
|
| 52 |
+
elif option == '-maxlen':
|
| 53 |
+
maxlen = int(sys.argv[1]); del sys.argv[1]
|
| 54 |
+
else:
|
| 55 |
+
sys.stderr.write("Invalid option: %s\n" % (option))
|
| 56 |
+
sys.exit(1)
|
| 57 |
+
'''
|
| 58 |
+
elif option == '-tc':
|
| 59 |
+
if sys.argv[1] == '1':
|
| 60 |
+
SHOW_TC_A1 = 1; del sys.argv[1]
|
| 61 |
+
elif sys.argv[1] == '2':
|
| 62 |
+
SHOW_TC_A2 = 2; del sys.argv[1]
|
| 63 |
+
else:
|
| 64 |
+
raise Exception, "Invalid argument to option -tc"
|
| 65 |
+
'''
|
| 66 |
+
|
| 67 |
+
if a1_file_str == "" or f_file_str == "" or e_file_str == "":
|
| 68 |
+
raise Exception, "Not all options properly specified."
|
| 69 |
+
# Make sure transitive closure binary exists if user has enabled this option
|
| 70 |
+
if SHOW_TC_A1 or SHOW_TC_A2:
|
| 71 |
+
if not os.path.exists(TC_BIN):
|
| 72 |
+
raise Exception, "Transitive closure binary "+TC_BIN+" not found."
|
| 73 |
+
except Exception, msg:
|
| 74 |
+
sys.stderr.write("%s: %s\n" % (sys.argv[0], msg))
|
| 75 |
+
sys.stderr.write("Usage: %s: -a1 <alignment1> -f <f> -e <e> [-a2 <alignment2>]\n" % (sys.argv[0]))
|
| 76 |
+
sys.stderr.write("Mandatory arguments:\n")
|
| 77 |
+
sys.stderr.write(" -a1 <a1>\t path to alignment 1 file in f-e format\n")
|
| 78 |
+
sys.stderr.write(" -f <f>\t\t path to source text f\n")
|
| 79 |
+
sys.stderr.write(" -e <e>\t\t path to target text e\n")
|
| 80 |
+
sys.stderr.write("Optional arguments:\n")
|
| 81 |
+
sys.stderr.write(" -a2 <a2>\t path to alignment 2 file in f-e format\n")
|
| 82 |
+
sys.stderr.write(" -maxlen <len>\t display alignment only when e and f have length <= len\n")
|
| 83 |
+
sys.exit(1)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
a_file = open(a1_file_str, 'r')
|
| 87 |
+
f_file = open(f_file_str, 'r')
|
| 88 |
+
e_file = open(e_file_str, 'r')
|
| 89 |
+
if a2_file_str != "":
|
| 90 |
+
a2_file = open(a2_file_str, 'r')
|
| 91 |
+
|
| 92 |
+
sentenceNumber = 0
|
| 93 |
+
nextRequested = 1
|
| 94 |
+
for aline in a_file:
|
| 95 |
+
eline = e_file.readline()
|
| 96 |
+
fline = f_file.readline()
|
| 97 |
+
if a2_file_str != "":
|
| 98 |
+
a2line = a2_file.readline()
|
| 99 |
+
|
| 100 |
+
links = aline.split()
|
| 101 |
+
e_words = eline.split()
|
| 102 |
+
f_words = fline.split()
|
| 103 |
+
if a2_file_str != "":
|
| 104 |
+
links2 = a2line.split()
|
| 105 |
+
|
| 106 |
+
# Get transitive closure of links and links2
|
| 107 |
+
if SHOW_TC_A1:
|
| 108 |
+
cmd = 'echo "' + ' '.join(links) + '" | ' + TC_BIN
|
| 109 |
+
failure1, output1 = commands.getstatusoutput(cmd)
|
| 110 |
+
tc1 = output1.split()
|
| 111 |
+
if SHOW_TC_A2:
|
| 112 |
+
cmd = 'echo "' + ' '.join(links2) + '" | ' + TC_BIN
|
| 113 |
+
failure2, output2 = commands.getstatusoutput(cmd)
|
| 114 |
+
tc2 = output2.split()
|
| 115 |
+
|
| 116 |
+
# Update tracking counts
|
| 117 |
+
sentenceNumber += 1
|
| 118 |
+
if sentenceNumber < nextRequested:
|
| 119 |
+
continue
|
| 120 |
+
|
| 121 |
+
# Don't generate alignment grids for very large sentences
|
| 122 |
+
if len(e_words) > maxlen or len(f_words) > maxlen:
|
| 123 |
+
continue
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
print "== SENTENCE ",sentenceNumber," =="
|
| 127 |
+
|
| 128 |
+
# Initialize alignment objects
|
| 129 |
+
# a holds alignments of user-specified -a1 <file>
|
| 130 |
+
# a2 holds alignments of user-specified -a2 <file>
|
| 131 |
+
a = defaultdict(lambda: defaultdict(int))
|
| 132 |
+
a2 = defaultdict(lambda: defaultdict(int))
|
| 133 |
+
|
| 134 |
+
# Print e_words on the columns
|
| 135 |
+
# First, find the length of the longest word
|
| 136 |
+
longestEWordSize = 0
|
| 137 |
+
longestEWord = 0
|
| 138 |
+
for w in e_words:
|
| 139 |
+
if len(w) > longestEWordSize:
|
| 140 |
+
longestEWordSize = len(w)
|
| 141 |
+
longestEWord = w
|
| 142 |
+
|
| 143 |
+
# Now, print the e-words
|
| 144 |
+
for i in range(longestEWordSize, 0, -1):
|
| 145 |
+
for w in e_words:
|
| 146 |
+
if len(w) < i:
|
| 147 |
+
print " ",
|
| 148 |
+
else:
|
| 149 |
+
print w[(i*-1)],
|
| 150 |
+
print
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# Fill in alignment matrix 1
|
| 154 |
+
for link in links:
|
| 155 |
+
i, j = map(int, link.split('-'))
|
| 156 |
+
a[int(i)][int(j)] = 1
|
| 157 |
+
# Fill in extra links added by transitive closure
|
| 158 |
+
if SHOW_TC_A1:
|
| 159 |
+
for link in tc1:
|
| 160 |
+
i, j = map(int, link.split('-'))
|
| 161 |
+
if(a[i][j] != 1):
|
| 162 |
+
a[i][j] = 2
|
| 163 |
+
|
| 164 |
+
# Fill in alignment matrix 2
|
| 165 |
+
if(a2_file_str != ""):
|
| 166 |
+
for link in links2:
|
| 167 |
+
i, j = map(int, link.split('-'))
|
| 168 |
+
a2[i][j] = 1
|
| 169 |
+
# Fill in extra links added by transitive closure
|
| 170 |
+
if SHOW_TC_A2:
|
| 171 |
+
for link in tc2:
|
| 172 |
+
i, j = map(int, link.split('-'))
|
| 173 |
+
if(a2[i][j] != 1):
|
| 174 |
+
a2[i][j] = 2
|
| 175 |
+
|
| 176 |
+
# Print filled-in alignment matrix
|
| 177 |
+
if a2_file_str == "":
|
| 178 |
+
for i, _ in enumerate(f_words):
|
| 179 |
+
for j, _ in enumerate(e_words):
|
| 180 |
+
val1 = a[i][j]
|
| 181 |
+
if val1 == 0:
|
| 182 |
+
# No link
|
| 183 |
+
print ':',
|
| 184 |
+
elif val1 == 1:
|
| 185 |
+
# Regular link
|
| 186 |
+
print u'\u001b[44m\u0020\u001b[0m',
|
| 187 |
+
elif val1 == 2:
|
| 188 |
+
# Link due to transitive closure
|
| 189 |
+
# Render as gray-shaded square
|
| 190 |
+
print 'O',
|
| 191 |
+
print f_words[i]
|
| 192 |
+
print
|
| 193 |
+
else:
|
| 194 |
+
for i, _ in enumerate(f_words):
|
| 195 |
+
for j, _ in enumerate(e_words):
|
| 196 |
+
val1 = a[i][j]
|
| 197 |
+
val2 = a2[i][j]
|
| 198 |
+
|
| 199 |
+
if val1 == 0 and val2 == 0:
|
| 200 |
+
# Link not in a nor a2
|
| 201 |
+
# Empty grid box
|
| 202 |
+
print ':',
|
| 203 |
+
# Link in both a and a2
|
| 204 |
+
elif val1 > 0 and val2 > 0:
|
| 205 |
+
# Green box
|
| 206 |
+
if val1 == 1:
|
| 207 |
+
if val2 == 1:
|
| 208 |
+
print u'\u001b[42m\u001b[1m\u0020\u001b[0m',
|
| 209 |
+
elif val2 == 2:
|
| 210 |
+
print u'\u001b[42m\u001b[30m2\u001b[0m',
|
| 211 |
+
elif val1 == 2:
|
| 212 |
+
if val2 == 1:
|
| 213 |
+
print u'\u001b[42m\u0020\u001b[0m',
|
| 214 |
+
elif val2 == 2:
|
| 215 |
+
print u'\u001b[42m\u001b[30m3\u001b[0m',
|
| 216 |
+
# Link in a2, but not a
|
| 217 |
+
elif val1 == 0 and val2 > 0:
|
| 218 |
+
if val2 == 1:
|
| 219 |
+
# Yellow box
|
| 220 |
+
print u'\u001b[1m\u001b[43m\u0020\u001b[0m',
|
| 221 |
+
elif val2 == 2:
|
| 222 |
+
# Artificial link by transitive closure
|
| 223 |
+
print u'\u001b[43m\u001b[30m2\u001b[0m',
|
| 224 |
+
|
| 225 |
+
# Link in a, but not a2
|
| 226 |
+
elif val1 > 0 and val2 == 0:
|
| 227 |
+
if val1 == 1:
|
| 228 |
+
# Blue box
|
| 229 |
+
print u'\u001b[1m\u001b[44m\u0020\u001b[0m',
|
| 230 |
+
elif val1 == 2:
|
| 231 |
+
print u'\u001b[44m\u001b[37m1\u001b[0m',
|
| 232 |
+
print f_words[i]
|
| 233 |
+
nextDefault = sentenceNumber + 1
|
| 234 |
+
sys.stdout.write("Enter next alignment number or 'q' to quit [%d]: " %(nextDefault))
|
| 235 |
+
user_input = sys.stdin.readline().strip()
|
| 236 |
+
if user_input == "":
|
| 237 |
+
nextRequested = nextDefault
|
| 238 |
+
elif user_input[0] == "q" or user_input == "quit":
|
| 239 |
+
sys.exit(1)
|
| 240 |
+
else:
|
| 241 |
+
try:
|
| 242 |
+
nextRequested = int(user_input)
|
| 243 |
+
except:
|
| 244 |
+
nextRequested = sentenceNumber + 1
|
| 245 |
+
sys.stdout.write("Unknown alignment id: %s\nContinuing with %d.\n" %(user_input, nextRequested))
|
| 246 |
+
|
| 247 |
+
a_file.close()
|
| 248 |
+
e_file.close()
|
| 249 |
+
f_file.close()
|
| 250 |
+
|
mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.idx
ADDED
|
Binary file (68 Bytes). View file
|
|
|
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.idx
ADDED
|
Binary file (76 Bytes). View file
|
|
|
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.srctree.wa
ADDED
|
Binary file (728 Bytes). View file
|
|
|