Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- FacebookAI/xlm-roberta-large/onnx/tokenizer.json +3 -0
- data/de2en/0.6/trainable_data/valid.de-en.de.idx +0 -0
- data/de2en/0.6/trainable_data/valid.de-en.en.idx +0 -0
- data/test/raw/de2en/test.de2en.flores.de +0 -0
- data/test/raw/de2en/test.de2en.flores.en +0 -0
- data/test/raw/de2en/test.de2en.wmt22.de +0 -0
- data/test/raw/de2en/test.de2en.wmt22.en +0 -0
- data/test/raw/de2en/test.de2en.wmt23.de +0 -0
- data/test/raw/de2en/test.de2en.wmt23.en +0 -0
- data/test/raw/en2de/test.en2de.flores.de +0 -0
- data/test/raw/en2de/test.en2de.flores.en +0 -0
- data/test/raw/en2de/test.en2de.wmt22.de +0 -0
- data/test/raw/en2de/test.en2de.wmt22.en +0 -0
- data/test/raw/en2de/test.en2de.wmt23.de +0 -0
- data/test/raw/en2de/test.en2de.wmt23.en +0 -0
- data/test/raw/en2zh/test.en2zh.flores.en +0 -0
- data/test/raw/en2zh/test.en2zh.flores.zh +0 -0
- data/test/raw/en2zh/test.en2zh.wmt22.en +0 -0
- data/test/raw/en2zh/test.en2zh.wmt22.zh +0 -0
- data/test/raw/en2zh/test.en2zh.wmt23.en +0 -0
- data/test/raw/en2zh/test.en2zh.wmt23.zh +0 -0
- data/test/raw/zh2en/test.zh2en.flores.en +0 -0
- data/test/raw/zh2en/test.zh2en.flores.zh +0 -0
- data/test/raw/zh2en/test.zh2en.wmt22.en +0 -0
- data/test/raw/zh2en/test.zh2en.wmt22.zh +0 -0
- data/test/raw/zh2en/test.zh2en.wmt23.en +0 -0
- data/test/raw/zh2en/test.zh2en.wmt23.zh +0 -0
- data/test/trainable_data/de2en/dict.de.txt +0 -0
- data/test/trainable_data/de2en/dict.en.txt +0 -0
- data/test/trainable_data/de2en/test.de-en.de.idx +0 -0
- data/test/trainable_data/de2en/test.de-en.en.idx +0 -0
- data/test/trainable_data/en2de/dict.de.txt +0 -0
- data/test/trainable_data/en2de/dict.en.txt +0 -0
- data/test/trainable_data/en2de/preprocess.log +12 -0
- data/test/trainable_data/en2de/test.en-de.de.idx +0 -0
- data/test/trainable_data/en2de/test1.en-de.de.idx +0 -0
- data/test/trainable_data/en2de/test1.en-de.en.idx +0 -0
- data/test/trainable_data/zh2en/preprocess2.log +6 -0
- data/test/trainable_data/zh2en/test.zh-en.zh.idx +0 -0
- data/test/trainable_data/zh2en/test1.zh-en.zh.idx +0 -0
- mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa +3 -0
- mosesdecoder/cruise-control/README +34 -0
- mosesdecoder/cruise-control/config.ems +473 -0
- mosesdecoder/cruise-control/create-binary.perl +55 -0
- mosesdecoder/cruise-control/example.config +25 -0
- mosesdecoder/cruise-control/shorten_info.pl +9 -0
- mosesdecoder/cruise-control/test_all_new_commits.sh +220 -0
- mosesdecoder/cruise-control/web/html_templates.php +112 -0
- mosesdecoder/cruise-control/web/index.php +101 -0
.gitattributes
CHANGED
|
@@ -39,3 +39,5 @@ fairseq-0.10.2/fairseq/data/token_block_utils_fast.cpython-310-x86_64-linux-gnu.
|
|
| 39 |
fairseq-0.10.2/docs/fairseq.gif filter=lfs diff=lfs merge=lfs -text
|
| 40 |
mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
|
| 41 |
mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 39 |
fairseq-0.10.2/docs/fairseq.gif filter=lfs diff=lfs merge=lfs -text
|
| 40 |
mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
|
| 41 |
mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
FacebookAI/xlm-roberta-large/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
FacebookAI/xlm-roberta-large/onnx/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62c24cdc13d4c9952d63718d6c9fa4c287974249e16b7ade6d5a85e7bbb75626
|
| 3 |
+
size 17082660
|
data/de2en/0.6/trainable_data/valid.de-en.de.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/de2en/0.6/trainable_data/valid.de-en.en.idx
ADDED
|
Binary file (24 kB). View file
|
|
|
data/test/raw/de2en/test.de2en.flores.de
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/de2en/test.de2en.flores.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/de2en/test.de2en.wmt22.de
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/de2en/test.de2en.wmt22.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/de2en/test.de2en.wmt23.de
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/de2en/test.de2en.wmt23.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2de/test.en2de.flores.de
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2de/test.en2de.flores.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2de/test.en2de.wmt22.de
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2de/test.en2de.wmt22.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2de/test.en2de.wmt23.de
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2de/test.en2de.wmt23.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2zh/test.en2zh.flores.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2zh/test.en2zh.flores.zh
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2zh/test.en2zh.wmt22.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2zh/test.en2zh.wmt22.zh
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2zh/test.en2zh.wmt23.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/en2zh/test.en2zh.wmt23.zh
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/zh2en/test.zh2en.flores.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/zh2en/test.zh2en.flores.zh
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/zh2en/test.zh2en.wmt22.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/zh2en/test.zh2en.wmt22.zh
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/zh2en/test.zh2en.wmt23.en
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/raw/zh2en/test.zh2en.wmt23.zh
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/trainable_data/de2en/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/trainable_data/de2en/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/trainable_data/de2en/test.de-en.de.idx
ADDED
|
Binary file (12.2 kB). View file
|
|
|
data/test/trainable_data/de2en/test.de-en.en.idx
ADDED
|
Binary file (12.2 kB). View file
|
|
|
data/test/trainable_data/en2de/dict.de.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/trainable_data/en2de/dict.en.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/test/trainable_data/en2de/preprocess.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref=None, validpref=None, testpref='/mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt23', align_suffix=None, destdir='/mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[en] Dictionary: 47776 types
|
| 3 |
+
[en] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt23.en: 557 sents, 47662 tokens, 0.0% replaced by <unk>
|
| 4 |
+
[de] Dictionary: 47776 types
|
| 5 |
+
[de] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt23.de: 557 sents, 51978 tokens, 0.0% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de
|
| 7 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref=None, validpref=None, testpref='/mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt22', align_suffix=None, destdir='/mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 8 |
+
[en] Dictionary: 47776 types
|
| 9 |
+
[en] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt22.en: 2037 sents, 46796 tokens, 0.00641% replaced by <unk>
|
| 10 |
+
[de] Dictionary: 47776 types
|
| 11 |
+
[de] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt22.de: 2037 sents, 51454 tokens, 0.00389% replaced by <unk>
|
| 12 |
+
Wrote preprocessed data to /mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de
|
data/test/trainable_data/en2de/test.en-de.de.idx
ADDED
|
Binary file (12.2 kB). View file
|
|
|
data/test/trainable_data/en2de/test1.en-de.de.idx
ADDED
|
Binary file (24.5 kB). View file
|
|
|
data/test/trainable_data/en2de/test1.en-de.en.idx
ADDED
|
Binary file (24.5 kB). View file
|
|
|
data/test/trainable_data/zh2en/preprocess2.log
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='zh', target_lang='en', trainpref=None, validpref=None, testpref='/mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/zh2en/bpe.test.zh2en.wmt23', align_suffix=None, destdir='/mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/zh2en2', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/congmh/luoyf/xzq-fairseq/data/en-zh/wmt23/trainable_data/dict.en.txt', srcdict='/mnt/congmh/luoyf/xzq-fairseq/data/en-zh/wmt23/trainable_data/dict.zh.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
|
| 2 |
+
[zh] Dictionary: 60432 types
|
| 3 |
+
[zh] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/zh2en/bpe.test.zh2en.wmt23.zh: 1976 sents, 52111 tokens, 0.00192% replaced by <unk>
|
| 4 |
+
[en] Dictionary: 46040 types
|
| 5 |
+
[en] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/zh2en/bpe.test.zh2en.wmt23.en: 1976 sents, 61608 tokens, 0.00162% replaced by <unk>
|
| 6 |
+
Wrote preprocessed data to /mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/zh2en2
|
data/test/trainable_data/zh2en/test.zh-en.zh.idx
ADDED
|
Binary file (12.2 kB). View file
|
|
|
data/test/trainable_data/zh2en/test1.zh-en.zh.idx
ADDED
|
Binary file (22.5 kB). View file
|
|
|
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26f35f2fa1260b517cba96ce9a3df1890286d644ab99d975d85f5b06eae6a4d5
|
| 3 |
+
size 720173
|
mosesdecoder/cruise-control/README
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
A simple regular testing of Moses codebase, aka cruise control
|
| 2 |
+
--------------------------------------------------------------
|
| 3 |
+
|
| 4 |
+
Features:
|
| 5 |
+
|
| 6 |
+
- Checkout latest commits
|
| 7 |
+
- Test configure, make, make scripts
|
| 8 |
+
- Run regression tests
|
| 9 |
+
- Run a sample EMS pipeline
|
| 10 |
+
- Report results into logfiles
|
| 11 |
+
- A simple web interface in PHP
|
| 12 |
+
|
| 13 |
+
How to run cruise control:
|
| 14 |
+
|
| 15 |
+
1) Download and compile SRILM
|
| 16 |
+
|
| 17 |
+
2) Download, compile and install IRSTLM
|
| 18 |
+
|
| 19 |
+
3) Edit the configuration file example.config. You can create as many
|
| 20 |
+
configuration files as you like.
|
| 21 |
+
|
| 22 |
+
4) Execute ./test_all_new_commits.sh yourfile.config
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
How to set up the web interface:
|
| 26 |
+
|
| 27 |
+
1) Install Apache and PHP
|
| 28 |
+
|
| 29 |
+
2) Copy files from cruise-control/web into Apache's www directory
|
| 30 |
+
|
| 31 |
+
3) Point StaticData::logs_path to correct directory, e.g. /home/cruise/logs/example/
|
| 32 |
+
Default value is 'data', you might want to just create a symlink.
|
| 33 |
+
|
| 34 |
+
Written by Ondrej Bojar, Ales Tamchyna, Barry Haddow, Rimas Blazaitis
|
mosesdecoder/cruise-control/config.ems
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
################################################
|
| 2 |
+
### CONFIGURATION FILE FOR AN SMT EXPERIMENT ###
|
| 3 |
+
################################################
|
| 4 |
+
|
| 5 |
+
[GENERAL]
|
| 6 |
+
|
| 7 |
+
### directory in which experiment is run
|
| 8 |
+
#
|
| 9 |
+
working-dir = WORKDIR/ems_workdir
|
| 10 |
+
|
| 11 |
+
# Giza and friends
|
| 12 |
+
external-bin-dir = WORKDIR/giza-pp/bin/
|
| 13 |
+
|
| 14 |
+
# specification of the language pair
|
| 15 |
+
input-extension = fr
|
| 16 |
+
output-extension = en
|
| 17 |
+
pair-extension = fr-en
|
| 18 |
+
|
| 19 |
+
### directories that contain tools and data
|
| 20 |
+
#
|
| 21 |
+
# moses
|
| 22 |
+
moses-src-dir = WORKDIR
|
| 23 |
+
#
|
| 24 |
+
# moses scripts
|
| 25 |
+
moses-script-dir = WORKDIR/scripts
|
| 26 |
+
#
|
| 27 |
+
# srilm
|
| 28 |
+
srilm-dir = SRILMDIR/bin/MACHINE_TYPE
|
| 29 |
+
#
|
| 30 |
+
# data
|
| 31 |
+
toy-data = $moses-script-dir/ems/example/data
|
| 32 |
+
|
| 33 |
+
### basic tools
|
| 34 |
+
#
|
| 35 |
+
# moses decoder
|
| 36 |
+
decoder = $moses-src-dir/bin/moses
|
| 37 |
+
|
| 38 |
+
# conversion of phrase table into binary on-disk format
|
| 39 |
+
ttable-binarizer = $moses-src-dir/bin/processPhraseTable
|
| 40 |
+
|
| 41 |
+
# conversion of rule table into binary on-disk format
|
| 42 |
+
#ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
|
| 43 |
+
|
| 44 |
+
# tokenizers - comment out if all your data is already tokenized
|
| 45 |
+
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
| 46 |
+
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
| 47 |
+
|
| 48 |
+
# truecasers - comment out if you do not use the truecaser
|
| 49 |
+
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
| 50 |
+
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
| 51 |
+
detruecaser = $moses-script-dir/recaser/detruecase.perl
|
| 52 |
+
|
| 53 |
+
### generic parallelizer for cluster and multi-core machines
|
| 54 |
+
# you may specify a script that allows the parallel execution
|
| 55 |
+
# parallizable steps (see meta file). you also need specify
|
| 56 |
+
# the number of jobs (cluster) or cores (multicore)
|
| 57 |
+
#
|
| 58 |
+
#generic-parallelizer = $moses-script-dir/ems/support/generic-parallelizer.perl
|
| 59 |
+
#generic-parallelizer = $moses-script-dir/ems/support/generic-multicore-parallelizer.perl
|
| 60 |
+
|
| 61 |
+
### cluster settings (if run on a cluster machine)
|
| 62 |
+
# number of jobs to be submitted in parallel
|
| 63 |
+
#
|
| 64 |
+
#jobs = 10
|
| 65 |
+
|
| 66 |
+
# arguments to qsub when scheduling a job
|
| 67 |
+
#qsub-settings = ""
|
| 68 |
+
|
| 69 |
+
# project for priviledges and usage accounting
|
| 70 |
+
#qsub-project = iccs_smt
|
| 71 |
+
|
| 72 |
+
# memory and time
|
| 73 |
+
#qsub-memory = 4
|
| 74 |
+
#qsub-hours = 48
|
| 75 |
+
|
| 76 |
+
### multi-core settings
|
| 77 |
+
# when the generic parallelizer is used, the number of cores
|
| 78 |
+
# specified here
|
| 79 |
+
cores = 8
|
| 80 |
+
|
| 81 |
+
#################################################################
|
| 82 |
+
# PARALLEL CORPUS PREPARATION:
|
| 83 |
+
# create a tokenized, sentence-aligned corpus, ready for training
|
| 84 |
+
|
| 85 |
+
[CORPUS]
|
| 86 |
+
|
| 87 |
+
### long sentences are filtered out, since they slow down GIZA++
|
| 88 |
+
# and are a less reliable source of data. set here the maximum
|
| 89 |
+
# length of a sentence
|
| 90 |
+
#
|
| 91 |
+
max-sentence-length = 80
|
| 92 |
+
|
| 93 |
+
[CORPUS:toy]
|
| 94 |
+
|
| 95 |
+
### command to run to get raw corpus files
|
| 96 |
+
#
|
| 97 |
+
# get-corpus-script =
|
| 98 |
+
|
| 99 |
+
### raw corpus files (untokenized, but sentence aligned)
|
| 100 |
+
#
|
| 101 |
+
raw-stem = $toy-data/nc-5k
|
| 102 |
+
|
| 103 |
+
### tokenized corpus files (may contain long sentences)
|
| 104 |
+
#
|
| 105 |
+
#tokenized-stem =
|
| 106 |
+
|
| 107 |
+
### if sentence filtering should be skipped,
|
| 108 |
+
# point to the clean training data
|
| 109 |
+
#
|
| 110 |
+
#clean-stem =
|
| 111 |
+
|
| 112 |
+
### if corpus preparation should be skipped,
|
| 113 |
+
# point to the prepared training data
|
| 114 |
+
#
|
| 115 |
+
#lowercased-stem =
|
| 116 |
+
|
| 117 |
+
#################################################################
|
| 118 |
+
# LANGUAGE MODEL TRAINING
|
| 119 |
+
|
| 120 |
+
[LM]
|
| 121 |
+
|
| 122 |
+
### tool to be used for language model training
|
| 123 |
+
# for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh)
|
| 124 |
+
#
|
| 125 |
+
lm-training = $srilm-dir/ngram-count
|
| 126 |
+
settings = "-interpolate -kndiscount -unk"
|
| 127 |
+
order = 5
|
| 128 |
+
|
| 129 |
+
### tool to be used for training randomized language model from scratch
|
| 130 |
+
# (more commonly, a SRILM is trained)
|
| 131 |
+
#
|
| 132 |
+
#rlm-training = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8"
|
| 133 |
+
|
| 134 |
+
### script to use for binary table format for irstlm or kenlm
|
| 135 |
+
# (default: no binarization)
|
| 136 |
+
|
| 137 |
+
# irstlm
|
| 138 |
+
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm
|
| 139 |
+
|
| 140 |
+
# kenlm, also set type to 8
|
| 141 |
+
#lm-binarizer = $moses-src-dir/kenlm/build_binary
|
| 142 |
+
type = 8
|
| 143 |
+
|
| 144 |
+
### script to create quantized language model format (irstlm)
|
| 145 |
+
# (default: no quantization)
|
| 146 |
+
#
|
| 147 |
+
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm
|
| 148 |
+
|
| 149 |
+
### script to use for converting into randomized table format
|
| 150 |
+
# (default: no randomization)
|
| 151 |
+
#
|
| 152 |
+
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8"
|
| 153 |
+
|
| 154 |
+
### each language model to be used has its own section here
|
| 155 |
+
|
| 156 |
+
[LM:toy]
|
| 157 |
+
|
| 158 |
+
### command to run to get raw corpus files
|
| 159 |
+
#
|
| 160 |
+
#get-corpus-script = ""
|
| 161 |
+
|
| 162 |
+
type = 8
|
| 163 |
+
|
| 164 |
+
### raw corpus (untokenized)
|
| 165 |
+
#
|
| 166 |
+
raw-corpus = $toy-data/nc-5k.$output-extension
|
| 167 |
+
|
| 168 |
+
### tokenized corpus files (may contain long sentences)
|
| 169 |
+
#
|
| 170 |
+
#tokenized-corpus =
|
| 171 |
+
|
| 172 |
+
### if corpus preparation should be skipped,
|
| 173 |
+
# point to the prepared language model
|
| 174 |
+
#
|
| 175 |
+
#lm =
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
[TRAINING]
|
| 179 |
+
|
| 180 |
+
### training script to be used: either a legacy script or
|
| 181 |
+
# current moses training script (default)
|
| 182 |
+
#
|
| 183 |
+
script = $moses-script-dir/training/train-model.perl
|
| 184 |
+
|
| 185 |
+
### general options
|
| 186 |
+
#
|
| 187 |
+
#training-options = ""
|
| 188 |
+
|
| 189 |
+
### factored training: specify here which factors used
|
| 190 |
+
# if none specified, single factor training is assumed
|
| 191 |
+
# (one translation step, surface to surface)
|
| 192 |
+
#
|
| 193 |
+
#input-factors = word lemma pos morph
|
| 194 |
+
#output-factors = word lemma pos
|
| 195 |
+
#alignment-factors = "word -> word"
|
| 196 |
+
#translation-factors = "word -> word"
|
| 197 |
+
#reordering-factors = "word -> word"
|
| 198 |
+
#generation-factors = "word -> pos"
|
| 199 |
+
#decoding-steps = "t0, g0"
|
| 200 |
+
|
| 201 |
+
### pre-computation for giza++
|
| 202 |
+
# giza++ has a more efficient data structure that needs to be
|
| 203 |
+
# initialized with snt2cooc. if run in parallel, this may reduces
|
| 204 |
+
# memory requirements. set here the number of parts
|
| 205 |
+
#
|
| 206 |
+
run-giza-in-parts = 5
|
| 207 |
+
|
| 208 |
+
### symmetrization method to obtain word alignments from giza output
|
| 209 |
+
# (commonly used: grow-diag-final-and)
|
| 210 |
+
#
|
| 211 |
+
alignment-symmetrization-method = grow-diag-final-and
|
| 212 |
+
|
| 213 |
+
### use of berkeley aligner for word alignment
|
| 214 |
+
#
|
| 215 |
+
#use-berkeley = true
|
| 216 |
+
#alignment-symmetrization-method = berkeley
|
| 217 |
+
#berkeley-train = $moses-script-dir/ems/support/berkeley-train.sh
|
| 218 |
+
#berkeley-process = $moses-script-dir/ems/support/berkeley-process.sh
|
| 219 |
+
#berkeley-jar = /your/path/to/berkeleyaligner-1.1/berkeleyaligner.jar
|
| 220 |
+
#berkeley-java-options = "-server -mx30000m -ea"
|
| 221 |
+
#berkeley-training-options = "-Main.iters 5 5 -EMWordAligner.numThreads 8"
|
| 222 |
+
#berkeley-process-options = "-EMWordAligner.numThreads 8"
|
| 223 |
+
#berkeley-posterior = 0.5
|
| 224 |
+
|
| 225 |
+
### if word alignment should be skipped,
|
| 226 |
+
# point to word alignment files
|
| 227 |
+
#
|
| 228 |
+
#word-alignment = $working-dir/model/aligned.1
|
| 229 |
+
|
| 230 |
+
### create a bilingual concordancer for the model
|
| 231 |
+
#
|
| 232 |
+
#biconcor = $moses-script-dir/ems/biconcor/biconcor
|
| 233 |
+
|
| 234 |
+
### lexicalized reordering: specify orientation type
|
| 235 |
+
# (default: only distance-based reordering model)
|
| 236 |
+
#
|
| 237 |
+
lexicalized-reordering = msd-bidirectional-fe
|
| 238 |
+
|
| 239 |
+
### hierarchical rule set
|
| 240 |
+
#
|
| 241 |
+
#hierarchical-rule-set = true
|
| 242 |
+
|
| 243 |
+
### settings for rule extraction
|
| 244 |
+
#
|
| 245 |
+
#extract-settings = ""
|
| 246 |
+
|
| 247 |
+
### unknown word labels (target syntax only)
|
| 248 |
+
# enables use of unknown word labels during decoding
|
| 249 |
+
# label file is generated during rule extraction
|
| 250 |
+
#
|
| 251 |
+
#use-unknown-word-labels = true
|
| 252 |
+
|
| 253 |
+
### if phrase extraction should be skipped,
|
| 254 |
+
# point to stem for extract files
|
| 255 |
+
#
|
| 256 |
+
# extracted-phrases =
|
| 257 |
+
|
| 258 |
+
### settings for rule scoring
|
| 259 |
+
#
|
| 260 |
+
score-settings = "--GoodTuring"
|
| 261 |
+
|
| 262 |
+
### include word alignment in phrase table
|
| 263 |
+
#
|
| 264 |
+
#include-word-alignment-in-rules = yes
|
| 265 |
+
|
| 266 |
+
### if phrase table training should be skipped,
|
| 267 |
+
# point to phrase translation table
|
| 268 |
+
#
|
| 269 |
+
# phrase-translation-table =
|
| 270 |
+
|
| 271 |
+
### if reordering table training should be skipped,
|
| 272 |
+
# point to reordering table
|
| 273 |
+
#
|
| 274 |
+
# reordering-table =
|
| 275 |
+
|
| 276 |
+
### if training should be skipped,
|
| 277 |
+
# point to a configuration file that contains
|
| 278 |
+
# pointers to all relevant model files
|
| 279 |
+
#
|
| 280 |
+
#config =
|
| 281 |
+
|
| 282 |
+
#####################################################
|
| 283 |
+
### TUNING: finding good weights for model components
|
| 284 |
+
|
| 285 |
+
[TUNING]
|
| 286 |
+
|
| 287 |
+
### instead of tuning with this setting, old weights may be recycled
|
| 288 |
+
# specify here an old configuration file with matching weights
|
| 289 |
+
#
|
| 290 |
+
weight-config = $toy-data/weight.ini
|
| 291 |
+
|
| 292 |
+
### tuning script to be used
|
| 293 |
+
#
|
| 294 |
+
tuning-script = $moses-script-dir/training/mert-moses.pl
|
| 295 |
+
tuning-settings = "-mertdir $moses-src-dir/mert"
|
| 296 |
+
|
| 297 |
+
### specify the corpus used for tuning
|
| 298 |
+
# it should contain 1000s of sentences
|
| 299 |
+
#
|
| 300 |
+
#input-sgm =
|
| 301 |
+
#raw-input =
|
| 302 |
+
#tokenized-input =
|
| 303 |
+
#factorized-input =
|
| 304 |
+
#input =
|
| 305 |
+
#
|
| 306 |
+
#reference-sgm =
|
| 307 |
+
#raw-reference =
|
| 308 |
+
#tokenized-reference =
|
| 309 |
+
#factorized-reference =
|
| 310 |
+
#reference =
|
| 311 |
+
|
| 312 |
+
### size of n-best list used (typically 100)
|
| 313 |
+
#
|
| 314 |
+
nbest = 100
|
| 315 |
+
|
| 316 |
+
### ranges for weights for random initialization
|
| 317 |
+
# if not specified, the tuning script will use generic ranges
|
| 318 |
+
# it is not clear, if this matters
|
| 319 |
+
#
|
| 320 |
+
# lambda =
|
| 321 |
+
|
| 322 |
+
### additional flags for the filter script
|
| 323 |
+
#
|
| 324 |
+
filter-settings = ""
|
| 325 |
+
|
| 326 |
+
### additional flags for the decoder
|
| 327 |
+
#
|
| 328 |
+
decoder-settings = ""
|
| 329 |
+
|
| 330 |
+
### if tuning should be skipped, specify this here
|
| 331 |
+
# and also point to a configuration file that contains
|
| 332 |
+
# pointers to all relevant model files
|
| 333 |
+
#
|
| 334 |
+
#config =
|
| 335 |
+
|
| 336 |
+
#########################################################
|
| 337 |
+
## RECASER: restore case, this part only trains the model
|
| 338 |
+
|
| 339 |
+
[RECASING]
|
| 340 |
+
|
| 341 |
+
#decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm
|
| 342 |
+
|
| 343 |
+
### training data
|
| 344 |
+
# raw input needs to be still tokenized,
|
| 345 |
+
# also also tokenized input may be specified
|
| 346 |
+
#
|
| 347 |
+
#tokenized = [LM:europarl:tokenized-corpus]
|
| 348 |
+
|
| 349 |
+
# recase-config =
|
| 350 |
+
|
| 351 |
+
#lm-training = $srilm-dir/ngram-count
|
| 352 |
+
|
| 353 |
+
#######################################################
|
| 354 |
+
## TRUECASER: train model to truecase corpora and input
|
| 355 |
+
|
| 356 |
+
[TRUECASER]
|
| 357 |
+
|
| 358 |
+
### script to train truecaser models
|
| 359 |
+
#
|
| 360 |
+
trainer = $moses-script-dir/recaser/train-truecaser.perl
|
| 361 |
+
|
| 362 |
+
### training data
|
| 363 |
+
# data on which truecaser is trained
|
| 364 |
+
# if no training data is specified, parallel corpus is used
|
| 365 |
+
#
|
| 366 |
+
# raw-stem =
|
| 367 |
+
# tokenized-stem =
|
| 368 |
+
|
| 369 |
+
### trained model
|
| 370 |
+
#
|
| 371 |
+
# truecase-model =
|
| 372 |
+
|
| 373 |
+
######################################################################
|
| 374 |
+
## EVALUATION: translating a test set using the tuned system and score it
|
| 375 |
+
|
| 376 |
+
[EVALUATION]
|
| 377 |
+
|
| 378 |
+
### additional flags for the filter script
|
| 379 |
+
#
|
| 380 |
+
#filter-settings = ""
|
| 381 |
+
|
| 382 |
+
### additional decoder settings
|
| 383 |
+
# switches for the Moses decoder
|
| 384 |
+
#
|
| 385 |
+
decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000"
|
| 386 |
+
|
| 387 |
+
### specify size of n-best list, if produced
|
| 388 |
+
#
|
| 389 |
+
#nbest = 100
|
| 390 |
+
|
| 391 |
+
### multiple reference translations
|
| 392 |
+
#
|
| 393 |
+
#multiref = yes
|
| 394 |
+
|
| 395 |
+
### prepare system output for scoring
|
| 396 |
+
# this may include detokenization and wrapping output in sgm
|
| 397 |
+
# (needed for nist-bleu, ter, meteor)
|
| 398 |
+
#
|
| 399 |
+
detokenizer = "$moses-script-dir/tokenizer/detokenizer.perl -l $output-extension"
|
| 400 |
+
#recaser = $moses-script-dir/recaser/recase.perl
|
| 401 |
+
wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension"
|
| 402 |
+
#output-sgm =
|
| 403 |
+
|
| 404 |
+
### BLEU
|
| 405 |
+
#
|
| 406 |
+
nist-bleu = $moses-script-dir/generic/mteval-v12.pl
|
| 407 |
+
nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c"
|
| 408 |
+
#multi-bleu = $moses-script-dir/generic/multi-bleu.perl
|
| 409 |
+
#ibm-bleu =
|
| 410 |
+
|
| 411 |
+
### TER: translation error rate (BBN metric) based on edit distance
|
| 412 |
+
# not yet integrated
|
| 413 |
+
#
|
| 414 |
+
# ter =
|
| 415 |
+
|
| 416 |
+
### METEOR: gives credit to stem / worknet synonym matches
|
| 417 |
+
# not yet integrated
|
| 418 |
+
#
|
| 419 |
+
# meteor =
|
| 420 |
+
|
| 421 |
+
### Analysis: carry out various forms of analysis on the output
|
| 422 |
+
#
|
| 423 |
+
analysis = $moses-script-dir/ems/support/analysis.perl
|
| 424 |
+
#
|
| 425 |
+
# also report on input coverage
|
| 426 |
+
analyze-coverage = yes
|
| 427 |
+
#
|
| 428 |
+
# also report on phrase mappings used
|
| 429 |
+
report-segmentation = yes
|
| 430 |
+
#
|
| 431 |
+
# report precision of translations for each input word, broken down by
|
| 432 |
+
# count of input word in corpus and model
|
| 433 |
+
#report-precision-by-coverage = yes
|
| 434 |
+
#
|
| 435 |
+
# further precision breakdown by factor
|
| 436 |
+
#precision-by-coverage-factor = pos
|
| 437 |
+
|
| 438 |
+
[EVALUATION:test]
|
| 439 |
+
|
| 440 |
+
### input data
|
| 441 |
+
#
|
| 442 |
+
input-sgm = $toy-data/test-src.$input-extension.sgm
|
| 443 |
+
# raw-input =
|
| 444 |
+
# tokenized-input =
|
| 445 |
+
# factorized-input =
|
| 446 |
+
# input =
|
| 447 |
+
|
| 448 |
+
### reference data
|
| 449 |
+
#
|
| 450 |
+
reference-sgm = $toy-data/test-ref.$output-extension.sgm
|
| 451 |
+
# raw-reference =
|
| 452 |
+
# tokenized-reference =
|
| 453 |
+
# reference =
|
| 454 |
+
|
| 455 |
+
### analysis settings
|
| 456 |
+
# may contain any of the general evaluation analysis settings
|
| 457 |
+
# specific setting: base coverage statistics on earlier run
|
| 458 |
+
#
|
| 459 |
+
#precision-by-coverage-base = $working-dir/evaluation/test.analysis.5
|
| 460 |
+
|
| 461 |
+
### wrapping frame
|
| 462 |
+
# for nist-bleu and other scoring scripts, the output needs to be wrapped
|
| 463 |
+
# in sgm markup (typically like the input sgm)
|
| 464 |
+
#
|
| 465 |
+
wrapping-frame = $input-sgm
|
| 466 |
+
|
| 467 |
+
##########################################
|
| 468 |
+
### REPORTING: summarize evaluation scores
|
| 469 |
+
|
| 470 |
+
[REPORTING]
|
| 471 |
+
|
| 472 |
+
### currently no parameters for reporting section
|
| 473 |
+
|
mosesdecoder/cruise-control/create-binary.perl
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env perl
|
| 2 |
+
|
| 3 |
+
use strict;
|
| 4 |
+
|
| 5 |
+
my $HOME = $ENV{"HOME"};
|
| 6 |
+
my $HOSTNAME = "s0565741\@thor.inf.ed.ac.uk";
|
| 7 |
+
|
| 8 |
+
my $sriPath = $ARGV[0];
|
| 9 |
+
|
| 10 |
+
my $cmd;
|
| 11 |
+
|
| 12 |
+
# what machine
|
| 13 |
+
my $machine = `uname`;
|
| 14 |
+
chomp($machine);
|
| 15 |
+
|
| 16 |
+
# COMPILE
|
| 17 |
+
$cmd = "git checkout master && git pull";
|
| 18 |
+
print STDERR "Executing: $cmd \n";
|
| 19 |
+
system($cmd);
|
| 20 |
+
|
| 21 |
+
$cmd = "make -f contrib/Makefiles/install-dependencies.gmake && ./compile.sh --without-tcmalloc";
|
| 22 |
+
print STDERR "Executing: $cmd \n";
|
| 23 |
+
system($cmd);
|
| 24 |
+
|
| 25 |
+
#ZIP
|
| 26 |
+
if ($machine eq "Darwin") {
|
| 27 |
+
$machine = "mac";
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
$cmd = "mkdir -p mt-tools/moses && mv bin lib mt-tools/moses";
|
| 31 |
+
print STDERR "Executing: $cmd \n";
|
| 32 |
+
system($cmd);
|
| 33 |
+
|
| 34 |
+
$cmd = "tar -zcvf $machine.tgz mt-tools";
|
| 35 |
+
print STDERR "Executing: $cmd \n";
|
| 36 |
+
system($cmd);
|
| 37 |
+
|
| 38 |
+
# UPLOAD
|
| 39 |
+
my $date = `date "+%F"`;
|
| 40 |
+
chomp($date);
|
| 41 |
+
|
| 42 |
+
my $targetDir = "/fs/thor1/hieu/binaries/$date/";
|
| 43 |
+
print STDERR "Directory=$targetDir\n";
|
| 44 |
+
|
| 45 |
+
$cmd = "ssh $HOSTNAME mkdir -p $targetDir";
|
| 46 |
+
print STDERR "Executing: $cmd \n";
|
| 47 |
+
system($cmd);
|
| 48 |
+
|
| 49 |
+
$cmd = "rsync -rv --delete $machine.tgz $HOSTNAME:$targetDir";
|
| 50 |
+
print STDERR "Executing: $cmd \n";
|
| 51 |
+
system($cmd);
|
| 52 |
+
|
| 53 |
+
$cmd = "rm $machine.tgz";
|
| 54 |
+
print STDERR "Executing: $cmd \n";
|
| 55 |
+
system($cmd);
|
mosesdecoder/cruise-control/example.config
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# the tmp directory
|
| 2 |
+
MCC_TEMPDIR="/disk4/cruise-control/cruise-control/tmp/"
|
| 3 |
+
|
| 4 |
+
# where logs are saved
|
| 5 |
+
MCC_LOGDIR="/disk4/cruise-control/cruise-control/"
|
| 6 |
+
|
| 7 |
+
# repository that will be checked out
|
| 8 |
+
MCC_GITREPO="https://github.com/moses-smt/mosesdecoder.git"
|
| 9 |
+
|
| 10 |
+
# arguments passed to Moses configure
|
| 11 |
+
MCC_CONFIGURE_ARGS=" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
| 12 |
+
|
| 13 |
+
ALTERNATIVE_CONFIGURE_ARGS=(
|
| 14 |
+
" --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
| 15 |
+
" --with-srilm=/disk4/cruise-control/srilm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
| 16 |
+
" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
| 17 |
+
" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
|
| 18 |
+
" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --no-xmlrpc-c -j8 "
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# list of branches to be checked
|
| 22 |
+
MCC_SCAN_BRANCHES="origin/master"
|
| 23 |
+
|
| 24 |
+
# run full training/eval pipeline using EMS?
|
| 25 |
+
MCC_RUN_EMS="yes"
|
mosesdecoder/cruise-control/shorten_info.pl
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/perl
|
| 2 |
+
|
| 3 |
+
use strict;
|
| 4 |
+
use warnings;
|
| 5 |
+
|
| 6 |
+
while (<>) {
|
| 7 |
+
last if $_ =~ m/^diff --git/;
|
| 8 |
+
print $_;
|
| 9 |
+
}
|
mosesdecoder/cruise-control/test_all_new_commits.sh
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# given a config file runs tests on all untested commits of the scanned branches
|
| 3 |
+
# storing detailed logs to logs/CONFIGNAME/commit
|
| 4 |
+
# and extending the file brief.log
|
| 5 |
+
#
|
| 6 |
+
# A commit is assumed to be tested, if logs/CONFIGNAME/commit exists
|
| 7 |
+
#
|
| 8 |
+
# Ondrej Bojar, Ales Tamchyna, 2011
|
| 9 |
+
|
| 10 |
+
function warn() { echo "$@" >&2; }
|
| 11 |
+
function die() { echo "$@" >&2; exit 1; }
|
| 12 |
+
set -o pipefail # safer pipes
|
| 13 |
+
|
| 14 |
+
configf="$1"
|
| 15 |
+
[ -e "$configf" ] || die "usage: $0 configfile"
|
| 16 |
+
configname=$(basename $configf | sed 's/\.config$//')
|
| 17 |
+
|
| 18 |
+
source "$configf"
|
| 19 |
+
|
| 20 |
+
# beautifier
|
| 21 |
+
git clone git@github.com:moses-smt/mosesdecoder.git /tmp/moses
|
| 22 |
+
cd /tmp/moses
|
| 23 |
+
./scripts/other/beautify.py --format --skip-perltidy
|
| 24 |
+
git commit -am "daily automatic beautifier"
|
| 25 |
+
git push
|
| 26 |
+
rm -rf /tmp/moses
|
| 27 |
+
cd -
|
| 28 |
+
|
| 29 |
+
[ -z "$MCC_SCAN_BRANCHES" ] \
|
| 30 |
+
&& die "Bad config $configf; does not define MCC_SCAN_BRANCHES"
|
| 31 |
+
|
| 32 |
+
# use the given tempdir or make subdir tmp here
|
| 33 |
+
USE_TEMPDIR=$MCC_TEMPDIR
|
| 34 |
+
[ -d "$USE_TEMPDIR" ] || USE_TEMPDIR=./tmp
|
| 35 |
+
|
| 36 |
+
LOGDIR=$MCC_LOGDIR
|
| 37 |
+
[ -d "$LOGDIR" ] || LOGDIR=.
|
| 38 |
+
|
| 39 |
+
# ensure full path for logdir
|
| 40 |
+
LOGDIR=$(readlink -f "$LOGDIR")
|
| 41 |
+
[ -d "$LOGDIR" ] || die "Fatal: confusing readlink for $LOGDIR"
|
| 42 |
+
|
| 43 |
+
# this is where moses is cloned into
|
| 44 |
+
WORKDIR=$MCC_WORKDIR
|
| 45 |
+
[ -d "$WORKDIR" ] || WORKDIR=$USE_TEMPDIR/workdir
|
| 46 |
+
|
| 47 |
+
MYDIR=$(pwd)
|
| 48 |
+
|
| 49 |
+
# this is where moses is taken from
|
| 50 |
+
GITREPO="$MCC_GITREPO"
|
| 51 |
+
[ -n "$GITREPO" ] || GITREPO=/home/obo/moses-at-google-code
|
| 52 |
+
|
| 53 |
+
# location of moses regression test data archive (assumes url at the moment)
|
| 54 |
+
REGTEST_ARCHIVE="$MCC_REGTEST_ARCHIVE"
|
| 55 |
+
[ -n "$REGTEST_ARCHIVE" ] \
|
| 56 |
+
|| REGTEST_ARCHIVE="git://github.com/moses-smt/moses-regression-tests.git"
|
| 57 |
+
|
| 58 |
+
if [ ! -d "$WORKDIR" ]; then
|
| 59 |
+
mkdir "$WORKDIR" || die "Failed to create workdir $WORKDIR"
|
| 60 |
+
warn "Cloning $GITREPO into $WORKDIR"
|
| 61 |
+
git clone $GITREPO $WORKDIR \
|
| 62 |
+
|| die "Failed to git clone into workdir $WORKDIR"
|
| 63 |
+
else
|
| 64 |
+
( cd "$WORKDIR" && git fetch ) \
|
| 65 |
+
|| die "Failed to update our clone at $WORKDIR"
|
| 66 |
+
fi
|
| 67 |
+
|
| 68 |
+
mkdir -p $LOGDIR/logs/$configname \
|
| 69 |
+
|| die "Failed to create dir $LOGDIR/logs/$configname"
|
| 70 |
+
|
| 71 |
+
#### How is one test performed
|
| 72 |
+
function run_single_test () {
|
| 73 |
+
commit=$1
|
| 74 |
+
first_char=$(echo $commit | grep -o '^.')
|
| 75 |
+
longlog="$LOGDIR/logs/$configname/$first_char/$commit.log"
|
| 76 |
+
warn "Testing commit $commit"
|
| 77 |
+
|
| 78 |
+
# Get the version of this script
|
| 79 |
+
ccversion=$(svnversion 2>/dev/null)
|
| 80 |
+
[ ! -z "$ccversion" ] || ccversion=$(git show 2>&1 | head -n 1)
|
| 81 |
+
[ ! -z "$ccversion" ] || ccversion="unknown"
|
| 82 |
+
|
| 83 |
+
# Create log header with computer details:
|
| 84 |
+
echo "#### Moses Cruise Control Log for commit $commit" > $longlog
|
| 85 |
+
date >> $longlog
|
| 86 |
+
echo "## Cruise Control version" >> $longlog
|
| 87 |
+
echo $ccversion >> $longlog
|
| 88 |
+
echo "## Parameters" >> $longlog
|
| 89 |
+
cat $MYDIR/$configf >> $longlog
|
| 90 |
+
echo "## Envinronment" >> $longlog
|
| 91 |
+
uname -a >> $longlog
|
| 92 |
+
env >> $longlog
|
| 93 |
+
|
| 94 |
+
git checkout --force $commit 2>/dev/null || die "Failed to checkout commit $commit"
|
| 95 |
+
|
| 96 |
+
err=""
|
| 97 |
+
|
| 98 |
+
cd regression-testing
|
| 99 |
+
#regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///')
|
| 100 |
+
|
| 101 |
+
# download data for regression tests if necessary
|
| 102 |
+
regtest_dir=$PWD/moses-reg-test-data
|
| 103 |
+
if [ -e $regtest_dir ]; then
|
| 104 |
+
(cd $regtest_dir; git pull) &> /dev/null ||
|
| 105 |
+
die "Failed to update regression testing data"
|
| 106 |
+
else
|
| 107 |
+
git clone $REGTEST_ARCHIVE $regtest_dir &> /dev/null ||
|
| 108 |
+
die "Failed to clone regression testing data"
|
| 109 |
+
fi
|
| 110 |
+
#if [ ! -f $regtest_file.ok ]; then
|
| 111 |
+
# wget $REGTEST_ARCHIVE &> /dev/null \
|
| 112 |
+
# || die "Failed to download data for regression tests"
|
| 113 |
+
# tar xzf $regtest_file
|
| 114 |
+
# touch $regtest_file.ok
|
| 115 |
+
#fi
|
| 116 |
+
#regtest_dir=$PWD/$(basename $regtest_file .tgz)
|
| 117 |
+
cd ..
|
| 118 |
+
|
| 119 |
+
# test build with different configurations
|
| 120 |
+
echo "## test build with different configurations" >> $longlog
|
| 121 |
+
for configArgs in "${ALTERNATIVE_CONFIGURE_ARGS[@]}"
|
| 122 |
+
do
|
| 123 |
+
echo "building with args: $configArgs" >> $longlog
|
| 124 |
+
./bjam clean -a $configArgs >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
| 125 |
+
done
|
| 126 |
+
|
| 127 |
+
echo "## ./bjam clean" >> $longlog
|
| 128 |
+
./bjam clean -a $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
| 129 |
+
|
| 130 |
+
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
|
| 131 |
+
if [ -z "$err" ]; then
|
| 132 |
+
./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam"
|
| 133 |
+
fi
|
| 134 |
+
|
| 135 |
+
echo "## regression tests" >> $longlog
|
| 136 |
+
if [ -z "$err" ]; then
|
| 137 |
+
./bjam $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || err="regression tests"
|
| 138 |
+
fi
|
| 139 |
+
|
| 140 |
+
if [ -z "$err" ] && [ "$MCC_RUN_EMS" = "yes" ]; then
|
| 141 |
+
echo "## EMS" >> $longlog
|
| 142 |
+
if [ ! -f "giza-pp.ok" ]; then # fetch & compile Giza++
|
| 143 |
+
svn checkout http://giza-pp.googlecode.com/svn/trunk/ giza-pp \
|
| 144 |
+
|| die "Failed to fetch Giza++"
|
| 145 |
+
cd giza-pp && make || die "Failed to compile Giza++"
|
| 146 |
+
mkdir -p bin
|
| 147 |
+
ln -s ../GIZA++-v2/GIZA++ ../GIZA++-v2/snt2cooc.out ../mkcls-v2/mkcls bin/
|
| 148 |
+
cd ..
|
| 149 |
+
touch giza-pp.ok
|
| 150 |
+
fi
|
| 151 |
+
./bjam $MCC_CONFIGURE_ARGS" || err="bjam"
|
| 152 |
+
srilm_dir=$(echo $MCC_CONFIGURE_ARGS | sed -r 's/.*--with-srilm=([^ ]+) .*/\1/')
|
| 153 |
+
mach_type=$($srilm_dir/sbin/machine-type)
|
| 154 |
+
mkdir -p "$WORKDIR/ems_workdir"
|
| 155 |
+
rm -rf "$WORKDIR/ems_workdir/"* # clean any previous experiments
|
| 156 |
+
cat $MYDIR/config.ems \
|
| 157 |
+
| sed \
|
| 158 |
+
-e "s#WORKDIR#$WORKDIR#" \
|
| 159 |
+
-e "s#SRILMDIR#$srilm_dir#" \
|
| 160 |
+
-e "s#MACHINE_TYPE#$mach_type#" \
|
| 161 |
+
> ./config.ems
|
| 162 |
+
scripts/ems/experiment.perl \
|
| 163 |
+
-no-graph -exec -config $(pwd)/config.ems &>> $longlog \
|
| 164 |
+
|| die "Running EMS failed"
|
| 165 |
+
[ -f $WORKDIR/ems_workdir/steps/1/REPORTING_report.1.DONE ] || err="ems"
|
| 166 |
+
fi
|
| 167 |
+
|
| 168 |
+
echo "## Finished" >> $longlog
|
| 169 |
+
date >> $longlog
|
| 170 |
+
|
| 171 |
+
if [ -z "$err" ]; then
|
| 172 |
+
status="OK"
|
| 173 |
+
else
|
| 174 |
+
git reset --hard HEAD
|
| 175 |
+
status="FAIL:$err"
|
| 176 |
+
fi
|
| 177 |
+
echo "## Status: $status" >> $longlog
|
| 178 |
+
|
| 179 |
+
nicedate=$(date +"%Y%m%d-%H%M%S")
|
| 180 |
+
echo "$commit$status$configname$ccversion$nicedate" \
|
| 181 |
+
>> "$LOGDIR/brief.log"
|
| 182 |
+
|
| 183 |
+
if [ -z "$err" ]; then
|
| 184 |
+
touch "$LOGDIR/logs/$configname/$first_char/$commit.OK"
|
| 185 |
+
else
|
| 186 |
+
return 1;
|
| 187 |
+
fi
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
cd $WORKDIR || die "Failed to chdir to $WORKDIR"
|
| 191 |
+
|
| 192 |
+
# update the revision lists for all watched branches
|
| 193 |
+
for i in $MCC_SCAN_BRANCHES; do
|
| 194 |
+
git rev-list $i > "$LOGDIR/logs/$configname/$(echo -n $i | sed 's/^.*\///').revlist"
|
| 195 |
+
done
|
| 196 |
+
|
| 197 |
+
# create info files for new commits
|
| 198 |
+
for i in $(git rev-list $MCC_SCAN_BRANCHES); do
|
| 199 |
+
first_char=$(echo $i | grep -o '^.')
|
| 200 |
+
mkdir -p "$LOGDIR/logs/$configname/$first_char"
|
| 201 |
+
[ -f "$LOGDIR/logs/$configname/$first_char/$i.info" ] && break;
|
| 202 |
+
git show $i | $MYDIR/shorten_info.pl > "$LOGDIR/logs/$configname/$first_char/$i.info"
|
| 203 |
+
done
|
| 204 |
+
|
| 205 |
+
#### Main loop over all commits
|
| 206 |
+
for i in $MCC_SCAN_BRANCHES; do
|
| 207 |
+
warn "On branch $i"
|
| 208 |
+
git rev-list $i \
|
| 209 |
+
| while read commit; do
|
| 210 |
+
first_char=$(echo $commit | grep -o '^.')
|
| 211 |
+
test_done="$LOGDIR/logs/$configname/$first_char/$commit.log"
|
| 212 |
+
if [ ! -e "$test_done" ]; then
|
| 213 |
+
run_single_test $commit && warn "Commit $commit test ok, stopping" && break
|
| 214 |
+
warn "Commit $commit test failed, continuing"
|
| 215 |
+
else
|
| 216 |
+
warn "Reached a previously tested commit ($commit), stopping"
|
| 217 |
+
break
|
| 218 |
+
fi
|
| 219 |
+
done
|
| 220 |
+
done
|
mosesdecoder/cruise-control/web/html_templates.php
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?php
|
| 2 |
+
|
| 3 |
+
function show_header($title)
|
| 4 |
+
{
|
| 5 |
+
echo "
|
| 6 |
+
<html>
|
| 7 |
+
<head>
|
| 8 |
+
<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html;charset=utf-8\">
|
| 9 |
+
<title>$title</title>
|
| 10 |
+
</head><body>";
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
function show_heading($text, $size = 1)
|
| 14 |
+
{
|
| 15 |
+
echo "
|
| 16 |
+
<h$size>$text</h$size>";
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
function show_footer()
|
| 20 |
+
{
|
| 21 |
+
echo "
|
| 22 |
+
</body>
|
| 23 |
+
<html>";
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
function end_table()
|
| 27 |
+
{
|
| 28 |
+
echo "
|
| 29 |
+
</table>";
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
function array_to_table_row($odd = true, $data)
|
| 33 |
+
{
|
| 34 |
+
$bgcolor = $odd ? " bgcolor=\"#ccccdd\"" : "";
|
| 35 |
+
echo "
|
| 36 |
+
<tr$bgcolor>";
|
| 37 |
+
foreach ($data as &$item) {
|
| 38 |
+
echo "
|
| 39 |
+
<td style=\"padding-left:8px; padding-right:8px\">$item</td>";
|
| 40 |
+
}
|
| 41 |
+
echo "
|
| 42 |
+
</tr>";
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
function start_table()
|
| 46 |
+
{
|
| 47 |
+
echo '
|
| 48 |
+
<table rules="cols" frame="vsides">';
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
function start_form($action, $method = "get")
|
| 52 |
+
{
|
| 53 |
+
echo "
|
| 54 |
+
<form action=\"$action\" method=\"$method\">";
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
function end_form()
|
| 58 |
+
{
|
| 59 |
+
echo "
|
| 60 |
+
</form>";
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
function show_select_box($items, $name, $selected = "", $onchange_hdl = "")
|
| 64 |
+
{
|
| 65 |
+
$onchange = $onchange_hdl ? " onchange=\"$onchange_hdl\"" : "";
|
| 66 |
+
echo "
|
| 67 |
+
<select name=\"$name\"$onchange>";
|
| 68 |
+
foreach ($items as &$item) {
|
| 69 |
+
$item_selected = $selected == $item ? " selected=\"yes\"" : "";
|
| 70 |
+
echo "
|
| 71 |
+
<option value=\"$item\"$item_selected>$item</option>";
|
| 72 |
+
}
|
| 73 |
+
echo "
|
| 74 |
+
</select>";
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
function get_href($label, $url, $new_window = false)
|
| 78 |
+
{
|
| 79 |
+
$target = $new_window ? " target=\"_blank\"" : "";
|
| 80 |
+
return "<a href=\"$url\"$target>$label</a>";
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
function warn($msg)
|
| 84 |
+
{
|
| 85 |
+
echo "<p><font color=\"red\"><b>$msg</b></font>";
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
function get_current_url()
|
| 89 |
+
{
|
| 90 |
+
return $_SERVER["REQUEST_URI"];
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
function set_var($url, $var, $value)
|
| 94 |
+
{
|
| 95 |
+
$url = cut_var($url, $var);
|
| 96 |
+
if ($url[strlen($url) - 1] == "?") {
|
| 97 |
+
$url .= "$var=$value";
|
| 98 |
+
} elseif (strpos($url, "?") !== false) {
|
| 99 |
+
$url .= "&$var=$value";
|
| 100 |
+
} else {
|
| 101 |
+
$url .= "?$var=$value";
|
| 102 |
+
}
|
| 103 |
+
return $url;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
function cut_var($url, $var)
|
| 107 |
+
{
|
| 108 |
+
// XXX there is probably a cleaner solution for this
|
| 109 |
+
return preg_replace('/&?' . $var . '=[^&]+/', '', $url);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
?>
|
mosesdecoder/cruise-control/web/index.php
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?php
|
| 2 |
+
|
| 3 |
+
include("html_templates.php");
|
| 4 |
+
include("log_wrapper.php");
|
| 5 |
+
|
| 6 |
+
const SHOW_ITEMS = 50;
|
| 7 |
+
const GITHUB_LINK = "https://github.com/moses-smt/mosesdecoder/commit/";
|
| 8 |
+
|
| 9 |
+
show_header("Moses Cruise Control");
|
| 10 |
+
echo "\n<center>\n";
|
| 11 |
+
|
| 12 |
+
show_heading("Moses Cruise Control");
|
| 13 |
+
echo "\n</center>\n";
|
| 14 |
+
|
| 15 |
+
// show current status of 'master' branch
|
| 16 |
+
$master_branch = new Branch("master");
|
| 17 |
+
$last_commit = $master_branch->get_next_commit();
|
| 18 |
+
$last_commit->read_log();
|
| 19 |
+
show_heading("Current status of master: " . colorize_status($last_commit->get_status()), 3);
|
| 20 |
+
$branch_name = ! empty($_GET["branch"]) ? $_GET["branch"] : "master";
|
| 21 |
+
|
| 22 |
+
// check that user wants to see a valid branch
|
| 23 |
+
$all_branches = get_all_branch_names();
|
| 24 |
+
if (! in_array($branch_name, $all_branches)) {
|
| 25 |
+
warn("Branch '$branch_name' not found (only branches with some tests done can be viewed)");
|
| 26 |
+
$branch_name = "master";
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
// branch select box
|
| 30 |
+
start_form("", "get");
|
| 31 |
+
echo "<p>Showing log of branch: ";
|
| 32 |
+
show_select_box($all_branches, "branch", $branch_name, "submit()");
|
| 33 |
+
end_form();
|
| 34 |
+
|
| 35 |
+
$branch = new Branch("$branch_name");
|
| 36 |
+
$start_with = ! empty($_GET["start"]) ? $_GET["start"] : 0;
|
| 37 |
+
$branch->set_line($start_with);
|
| 38 |
+
|
| 39 |
+
show_navigation($start_with);
|
| 40 |
+
|
| 41 |
+
// table of commits
|
| 42 |
+
start_table();
|
| 43 |
+
array_to_table_row(true, array("<b>Commit Link</b>", "<b>Status</b>", "<b>Full Log</b>",
|
| 44 |
+
"<b>Timestamp</b>", "<b>Author</b>", "<b>Commit Message</b>" ));
|
| 45 |
+
for ($i = 0; $i < SHOW_ITEMS; $i++) {
|
| 46 |
+
$last_commit = $branch->get_next_commit();
|
| 47 |
+
|
| 48 |
+
if ( $last_commit->get_name() == "" ) {
|
| 49 |
+
array_to_table_row(array("=== End of log ==="));
|
| 50 |
+
break;
|
| 51 |
+
}
|
| 52 |
+
$last_commit->read_log();
|
| 53 |
+
$last_commit->read_info();
|
| 54 |
+
|
| 55 |
+
array_to_table_row(($i % 2 == 1),
|
| 56 |
+
array( get_href(substr($last_commit->get_name(), 0, 10) . "...", GITHUB_LINK . $last_commit->get_name(), true),
|
| 57 |
+
colorize_status($last_commit->get_status()),
|
| 58 |
+
$last_commit->was_tested() ? get_href("Log", $last_commit->get_log_file(), true) : "N/A",
|
| 59 |
+
$last_commit->get_timestamp(),
|
| 60 |
+
$last_commit->get_author(),
|
| 61 |
+
substr($last_commit->get_message(), 0, 30) . (strlen($last_commit->get_message()) > 30 ? "..." : "")));
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
end_table();
|
| 65 |
+
|
| 66 |
+
show_navigation($start_with);
|
| 67 |
+
show_footer();
|
| 68 |
+
|
| 69 |
+
// HTML ends here
|
| 70 |
+
|
| 71 |
+
function colorize_status($status)
|
| 72 |
+
{
|
| 73 |
+
switch ( substr(strtolower($status), 0, 1) ) {
|
| 74 |
+
case "o":
|
| 75 |
+
$color = "green";
|
| 76 |
+
break;
|
| 77 |
+
case "f":
|
| 78 |
+
$color = "red";
|
| 79 |
+
break;
|
| 80 |
+
default:
|
| 81 |
+
$color = "#FFDD00";
|
| 82 |
+
}
|
| 83 |
+
return "<font color=\"$color\"><b>$status</b></font>";
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
function show_navigation($start_with)
|
| 87 |
+
{
|
| 88 |
+
start_form("", "get");
|
| 89 |
+
if ($start_with > 0) {
|
| 90 |
+
echo get_href("<p>Previous",
|
| 91 |
+
set_var(get_current_url(), "start", max(0, $start_with - SHOW_ITEMS)));
|
| 92 |
+
} else {
|
| 93 |
+
echo "Previous";
|
| 94 |
+
}
|
| 95 |
+
echo " ";
|
| 96 |
+
|
| 97 |
+
echo get_href("Next", set_var(get_current_url(), "start", $start_with + SHOW_ITEMS));
|
| 98 |
+
end_form();
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
?>
|