sleepyhead111 commited on
Commit
7cd8a0d
·
verified ·
1 Parent(s): 12aef23

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. FacebookAI/xlm-roberta-large/onnx/tokenizer.json +3 -0
  3. data/de2en/0.6/trainable_data/valid.de-en.de.idx +0 -0
  4. data/de2en/0.6/trainable_data/valid.de-en.en.idx +0 -0
  5. data/test/raw/de2en/test.de2en.flores.de +0 -0
  6. data/test/raw/de2en/test.de2en.flores.en +0 -0
  7. data/test/raw/de2en/test.de2en.wmt22.de +0 -0
  8. data/test/raw/de2en/test.de2en.wmt22.en +0 -0
  9. data/test/raw/de2en/test.de2en.wmt23.de +0 -0
  10. data/test/raw/de2en/test.de2en.wmt23.en +0 -0
  11. data/test/raw/en2de/test.en2de.flores.de +0 -0
  12. data/test/raw/en2de/test.en2de.flores.en +0 -0
  13. data/test/raw/en2de/test.en2de.wmt22.de +0 -0
  14. data/test/raw/en2de/test.en2de.wmt22.en +0 -0
  15. data/test/raw/en2de/test.en2de.wmt23.de +0 -0
  16. data/test/raw/en2de/test.en2de.wmt23.en +0 -0
  17. data/test/raw/en2zh/test.en2zh.flores.en +0 -0
  18. data/test/raw/en2zh/test.en2zh.flores.zh +0 -0
  19. data/test/raw/en2zh/test.en2zh.wmt22.en +0 -0
  20. data/test/raw/en2zh/test.en2zh.wmt22.zh +0 -0
  21. data/test/raw/en2zh/test.en2zh.wmt23.en +0 -0
  22. data/test/raw/en2zh/test.en2zh.wmt23.zh +0 -0
  23. data/test/raw/zh2en/test.zh2en.flores.en +0 -0
  24. data/test/raw/zh2en/test.zh2en.flores.zh +0 -0
  25. data/test/raw/zh2en/test.zh2en.wmt22.en +0 -0
  26. data/test/raw/zh2en/test.zh2en.wmt22.zh +0 -0
  27. data/test/raw/zh2en/test.zh2en.wmt23.en +0 -0
  28. data/test/raw/zh2en/test.zh2en.wmt23.zh +0 -0
  29. data/test/trainable_data/de2en/dict.de.txt +0 -0
  30. data/test/trainable_data/de2en/dict.en.txt +0 -0
  31. data/test/trainable_data/de2en/test.de-en.de.idx +0 -0
  32. data/test/trainable_data/de2en/test.de-en.en.idx +0 -0
  33. data/test/trainable_data/en2de/dict.de.txt +0 -0
  34. data/test/trainable_data/en2de/dict.en.txt +0 -0
  35. data/test/trainable_data/en2de/preprocess.log +12 -0
  36. data/test/trainable_data/en2de/test.en-de.de.idx +0 -0
  37. data/test/trainable_data/en2de/test1.en-de.de.idx +0 -0
  38. data/test/trainable_data/en2de/test1.en-de.en.idx +0 -0
  39. data/test/trainable_data/zh2en/preprocess2.log +6 -0
  40. data/test/trainable_data/zh2en/test.zh-en.zh.idx +0 -0
  41. data/test/trainable_data/zh2en/test1.zh-en.zh.idx +0 -0
  42. mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa +3 -0
  43. mosesdecoder/cruise-control/README +34 -0
  44. mosesdecoder/cruise-control/config.ems +473 -0
  45. mosesdecoder/cruise-control/create-binary.perl +55 -0
  46. mosesdecoder/cruise-control/example.config +25 -0
  47. mosesdecoder/cruise-control/shorten_info.pl +9 -0
  48. mosesdecoder/cruise-control/test_all_new_commits.sh +220 -0
  49. mosesdecoder/cruise-control/web/html_templates.php +112 -0
  50. mosesdecoder/cruise-control/web/index.php +101 -0
.gitattributes CHANGED
@@ -39,3 +39,5 @@ fairseq-0.10.2/fairseq/data/token_block_utils_fast.cpython-310-x86_64-linux-gnu.
39
  fairseq-0.10.2/docs/fairseq.gif filter=lfs diff=lfs merge=lfs -text
40
  mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
41
  mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
 
 
 
39
  fairseq-0.10.2/docs/fairseq.gif filter=lfs diff=lfs merge=lfs -text
40
  mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
41
  mosesdecoder/contrib/iSenWeb/Introduction/iSenWeb[[:space:]]A[[:space:]]Web-based[[:space:]]Machine[[:space:]]Translation[[:space:]]System[[:space:]]to[[:space:]]Translate[[:space:]]Sentences.docx filter=lfs diff=lfs merge=lfs -text
42
+ FacebookAI/xlm-roberta-large/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
FacebookAI/xlm-roberta-large/onnx/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c24cdc13d4c9952d63718d6c9fa4c287974249e16b7ade6d5a85e7bbb75626
3
+ size 17082660
data/de2en/0.6/trainable_data/valid.de-en.de.idx ADDED
Binary file (24 kB). View file
 
data/de2en/0.6/trainable_data/valid.de-en.en.idx ADDED
Binary file (24 kB). View file
 
data/test/raw/de2en/test.de2en.flores.de ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/de2en/test.de2en.flores.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/de2en/test.de2en.wmt22.de ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/de2en/test.de2en.wmt22.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/de2en/test.de2en.wmt23.de ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/de2en/test.de2en.wmt23.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2de/test.en2de.flores.de ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2de/test.en2de.flores.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2de/test.en2de.wmt22.de ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2de/test.en2de.wmt22.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2de/test.en2de.wmt23.de ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2de/test.en2de.wmt23.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2zh/test.en2zh.flores.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2zh/test.en2zh.flores.zh ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2zh/test.en2zh.wmt22.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2zh/test.en2zh.wmt22.zh ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2zh/test.en2zh.wmt23.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/en2zh/test.en2zh.wmt23.zh ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/zh2en/test.zh2en.flores.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/zh2en/test.zh2en.flores.zh ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/zh2en/test.zh2en.wmt22.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/zh2en/test.zh2en.wmt22.zh ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/zh2en/test.zh2en.wmt23.en ADDED
The diff for this file is too large to render. See raw diff
 
data/test/raw/zh2en/test.zh2en.wmt23.zh ADDED
The diff for this file is too large to render. See raw diff
 
data/test/trainable_data/de2en/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/test/trainable_data/de2en/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/test/trainable_data/de2en/test.de-en.de.idx ADDED
Binary file (12.2 kB). View file
 
data/test/trainable_data/de2en/test.de-en.en.idx ADDED
Binary file (12.2 kB). View file
 
data/test/trainable_data/en2de/dict.de.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/test/trainable_data/en2de/dict.en.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/test/trainable_data/en2de/preprocess.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref=None, validpref=None, testpref='/mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt23', align_suffix=None, destdir='/mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [en] Dictionary: 47776 types
3
+ [en] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt23.en: 557 sents, 47662 tokens, 0.0% replaced by <unk>
4
+ [de] Dictionary: 47776 types
5
+ [de] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt23.de: 557 sents, 51978 tokens, 0.0% replaced by <unk>
6
+ Wrote preprocessed data to /mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de
7
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='en', target_lang='de', trainpref=None, validpref=None, testpref='/mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt22', align_suffix=None, destdir='/mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.de.txt', srcdict='/mnt/congmh/luoyf/xzq-fairseq/data/de-en/wmt23/trainable_data/dict.en.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
8
+ [en] Dictionary: 47776 types
9
+ [en] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt22.en: 2037 sents, 46796 tokens, 0.00641% replaced by <unk>
10
+ [de] Dictionary: 47776 types
11
+ [de] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/en2de/bpe.test.en2de.wmt22.de: 2037 sents, 51454 tokens, 0.00389% replaced by <unk>
12
+ Wrote preprocessed data to /mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/en2de
data/test/trainable_data/en2de/test.en-de.de.idx ADDED
Binary file (12.2 kB). View file
 
data/test/trainable_data/en2de/test1.en-de.de.idx ADDED
Binary file (24.5 kB). View file
 
data/test/trainable_data/en2de/test1.en-de.en.idx ADDED
Binary file (24.5 kB). View file
 
data/test/trainable_data/zh2en/preprocess2.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Namespace(no_progress_bar=False, log_interval=100, log_format=None, tensorboard_logdir=None, seed=42, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, min_loss_scale=0.0001, threshold_loss_scale=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, checkpoint_suffix='', checkpoint_shard_count=1, quantization_config_path=None, profile=False, criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang='zh', target_lang='en', trainpref=None, validpref=None, testpref='/mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/zh2en/bpe.test.zh2en.wmt23', align_suffix=None, destdir='/mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/zh2en2', thresholdtgt=0, thresholdsrc=0, tgtdict='/mnt/congmh/luoyf/xzq-fairseq/data/en-zh/wmt23/trainable_data/dict.en.txt', srcdict='/mnt/congmh/luoyf/xzq-fairseq/data/en-zh/wmt23/trainable_data/dict.zh.txt', nwordstgt=-1, nwordssrc=-1, alignfile=None, dataset_impl='mmap', joined_dictionary=False, only_source=False, padding_factor=8, workers=32)
2
+ [zh] Dictionary: 60432 types
3
+ [zh] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/zh2en/bpe.test.zh2en.wmt23.zh: 1976 sents, 52111 tokens, 0.00192% replaced by <unk>
4
+ [en] Dictionary: 46040 types
5
+ [en] /mnt/congmh/luoyf/xzq-fairseq/data/test/tokenized/zh2en/bpe.test.zh2en.wmt23.en: 1976 sents, 61608 tokens, 0.00162% replaced by <unk>
6
+ Wrote preprocessed data to /mnt/congmh/luoyf/xzq-fairseq/data/test/trainable_data/zh2en2
data/test/trainable_data/zh2en/test.zh-en.zh.idx ADDED
Binary file (12.2 kB). View file
 
data/test/trainable_data/zh2en/test1.zh-en.zh.idx ADDED
Binary file (22.5 kB). View file
 
mosesdecoder/contrib/promix/test_data/esen.nc.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f35f2fa1260b517cba96ce9a3df1890286d644ab99d975d85f5b06eae6a4d5
3
+ size 720173
mosesdecoder/cruise-control/README ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ A simple regular testing of Moses codebase, aka cruise control
2
+ --------------------------------------------------------------
3
+
4
+ Features:
5
+
6
+ - Checkout latest commits
7
+ - Test configure, make, make scripts
8
+ - Run regression tests
9
+ - Run a sample EMS pipeline
10
+ - Report results into logfiles
11
+ - A simple web interface in PHP
12
+
13
+ How to run cruise control:
14
+
15
+ 1) Download and compile SRILM
16
+
17
+ 2) Download, compile and install IRSTLM
18
+
19
+ 3) Edit the configuration file example.config. You can create as many
20
+ configuration files as you like.
21
+
22
+ 4) Execute ./test_all_new_commits.sh yourfile.config
23
+
24
+
25
+ How to set up the web interface:
26
+
27
+ 1) Install Apache and PHP
28
+
29
+ 2) Copy files from cruise-control/web into Apache's www directory
30
+
31
+ 3) Point StaticData::logs_path to correct directory, e.g. /home/cruise/logs/example/
32
+ Default value is 'data', you might want to just create a symlink.
33
+
34
+ Written by Ondrej Bojar, Ales Tamchyna, Barry Haddow, Rimas Blazaitis
mosesdecoder/cruise-control/config.ems ADDED
@@ -0,0 +1,473 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ################################################
2
+ ### CONFIGURATION FILE FOR AN SMT EXPERIMENT ###
3
+ ################################################
4
+
5
+ [GENERAL]
6
+
7
+ ### directory in which experiment is run
8
+ #
9
+ working-dir = WORKDIR/ems_workdir
10
+
11
+ # Giza and friends
12
+ external-bin-dir = WORKDIR/giza-pp/bin/
13
+
14
+ # specification of the language pair
15
+ input-extension = fr
16
+ output-extension = en
17
+ pair-extension = fr-en
18
+
19
+ ### directories that contain tools and data
20
+ #
21
+ # moses
22
+ moses-src-dir = WORKDIR
23
+ #
24
+ # moses scripts
25
+ moses-script-dir = WORKDIR/scripts
26
+ #
27
+ # srilm
28
+ srilm-dir = SRILMDIR/bin/MACHINE_TYPE
29
+ #
30
+ # data
31
+ toy-data = $moses-script-dir/ems/example/data
32
+
33
+ ### basic tools
34
+ #
35
+ # moses decoder
36
+ decoder = $moses-src-dir/bin/moses
37
+
38
+ # conversion of phrase table into binary on-disk format
39
+ ttable-binarizer = $moses-src-dir/bin/processPhraseTable
40
+
41
+ # conversion of rule table into binary on-disk format
42
+ #ttable-binarizer = "$moses-src-dir/CreateOnDisk/src/CreateOnDiskPt 1 1 5 100 2"
43
+
44
+ # tokenizers - comment out if all your data is already tokenized
45
+ input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
46
+ output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
47
+
48
+ # truecasers - comment out if you do not use the truecaser
49
+ input-truecaser = $moses-script-dir/recaser/truecase.perl
50
+ output-truecaser = $moses-script-dir/recaser/truecase.perl
51
+ detruecaser = $moses-script-dir/recaser/detruecase.perl
52
+
53
+ ### generic parallelizer for cluster and multi-core machines
54
+ # you may specify a script that allows the parallel execution
55
+ # parallizable steps (see meta file). you also need specify
56
+ # the number of jobs (cluster) or cores (multicore)
57
+ #
58
+ #generic-parallelizer = $moses-script-dir/ems/support/generic-parallelizer.perl
59
+ #generic-parallelizer = $moses-script-dir/ems/support/generic-multicore-parallelizer.perl
60
+
61
+ ### cluster settings (if run on a cluster machine)
62
+ # number of jobs to be submitted in parallel
63
+ #
64
+ #jobs = 10
65
+
66
+ # arguments to qsub when scheduling a job
67
+ #qsub-settings = ""
68
+
69
+ # project for priviledges and usage accounting
70
+ #qsub-project = iccs_smt
71
+
72
+ # memory and time
73
+ #qsub-memory = 4
74
+ #qsub-hours = 48
75
+
76
+ ### multi-core settings
77
+ # when the generic parallelizer is used, the number of cores
78
+ # specified here
79
+ cores = 8
80
+
81
+ #################################################################
82
+ # PARALLEL CORPUS PREPARATION:
83
+ # create a tokenized, sentence-aligned corpus, ready for training
84
+
85
+ [CORPUS]
86
+
87
+ ### long sentences are filtered out, since they slow down GIZA++
88
+ # and are a less reliable source of data. set here the maximum
89
+ # length of a sentence
90
+ #
91
+ max-sentence-length = 80
92
+
93
+ [CORPUS:toy]
94
+
95
+ ### command to run to get raw corpus files
96
+ #
97
+ # get-corpus-script =
98
+
99
+ ### raw corpus files (untokenized, but sentence aligned)
100
+ #
101
+ raw-stem = $toy-data/nc-5k
102
+
103
+ ### tokenized corpus files (may contain long sentences)
104
+ #
105
+ #tokenized-stem =
106
+
107
+ ### if sentence filtering should be skipped,
108
+ # point to the clean training data
109
+ #
110
+ #clean-stem =
111
+
112
+ ### if corpus preparation should be skipped,
113
+ # point to the prepared training data
114
+ #
115
+ #lowercased-stem =
116
+
117
+ #################################################################
118
+ # LANGUAGE MODEL TRAINING
119
+
120
+ [LM]
121
+
122
+ ### tool to be used for language model training
123
+ # for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh)
124
+ #
125
+ lm-training = $srilm-dir/ngram-count
126
+ settings = "-interpolate -kndiscount -unk"
127
+ order = 5
128
+
129
+ ### tool to be used for training randomized language model from scratch
130
+ # (more commonly, a SRILM is trained)
131
+ #
132
+ #rlm-training = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8"
133
+
134
+ ### script to use for binary table format for irstlm or kenlm
135
+ # (default: no binarization)
136
+
137
+ # irstlm
138
+ #lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm
139
+
140
+ # kenlm, also set type to 8
141
+ #lm-binarizer = $moses-src-dir/kenlm/build_binary
142
+ type = 8
143
+
144
+ ### script to create quantized language model format (irstlm)
145
+ # (default: no quantization)
146
+ #
147
+ #lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm
148
+
149
+ ### script to use for converting into randomized table format
150
+ # (default: no randomization)
151
+ #
152
+ #lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8"
153
+
154
+ ### each language model to be used has its own section here
155
+
156
+ [LM:toy]
157
+
158
+ ### command to run to get raw corpus files
159
+ #
160
+ #get-corpus-script = ""
161
+
162
+ type = 8
163
+
164
+ ### raw corpus (untokenized)
165
+ #
166
+ raw-corpus = $toy-data/nc-5k.$output-extension
167
+
168
+ ### tokenized corpus files (may contain long sentences)
169
+ #
170
+ #tokenized-corpus =
171
+
172
+ ### if corpus preparation should be skipped,
173
+ # point to the prepared language model
174
+ #
175
+ #lm =
176
+
177
+
178
+ [TRAINING]
179
+
180
+ ### training script to be used: either a legacy script or
181
+ # current moses training script (default)
182
+ #
183
+ script = $moses-script-dir/training/train-model.perl
184
+
185
+ ### general options
186
+ #
187
+ #training-options = ""
188
+
189
+ ### factored training: specify here which factors used
190
+ # if none specified, single factor training is assumed
191
+ # (one translation step, surface to surface)
192
+ #
193
+ #input-factors = word lemma pos morph
194
+ #output-factors = word lemma pos
195
+ #alignment-factors = "word -> word"
196
+ #translation-factors = "word -> word"
197
+ #reordering-factors = "word -> word"
198
+ #generation-factors = "word -> pos"
199
+ #decoding-steps = "t0, g0"
200
+
201
+ ### pre-computation for giza++
202
+ # giza++ has a more efficient data structure that needs to be
203
+ # initialized with snt2cooc. if run in parallel, this may reduces
204
+ # memory requirements. set here the number of parts
205
+ #
206
+ run-giza-in-parts = 5
207
+
208
+ ### symmetrization method to obtain word alignments from giza output
209
+ # (commonly used: grow-diag-final-and)
210
+ #
211
+ alignment-symmetrization-method = grow-diag-final-and
212
+
213
+ ### use of berkeley aligner for word alignment
214
+ #
215
+ #use-berkeley = true
216
+ #alignment-symmetrization-method = berkeley
217
+ #berkeley-train = $moses-script-dir/ems/support/berkeley-train.sh
218
+ #berkeley-process = $moses-script-dir/ems/support/berkeley-process.sh
219
+ #berkeley-jar = /your/path/to/berkeleyaligner-1.1/berkeleyaligner.jar
220
+ #berkeley-java-options = "-server -mx30000m -ea"
221
+ #berkeley-training-options = "-Main.iters 5 5 -EMWordAligner.numThreads 8"
222
+ #berkeley-process-options = "-EMWordAligner.numThreads 8"
223
+ #berkeley-posterior = 0.5
224
+
225
+ ### if word alignment should be skipped,
226
+ # point to word alignment files
227
+ #
228
+ #word-alignment = $working-dir/model/aligned.1
229
+
230
+ ### create a bilingual concordancer for the model
231
+ #
232
+ #biconcor = $moses-script-dir/ems/biconcor/biconcor
233
+
234
+ ### lexicalized reordering: specify orientation type
235
+ # (default: only distance-based reordering model)
236
+ #
237
+ lexicalized-reordering = msd-bidirectional-fe
238
+
239
+ ### hierarchical rule set
240
+ #
241
+ #hierarchical-rule-set = true
242
+
243
+ ### settings for rule extraction
244
+ #
245
+ #extract-settings = ""
246
+
247
+ ### unknown word labels (target syntax only)
248
+ # enables use of unknown word labels during decoding
249
+ # label file is generated during rule extraction
250
+ #
251
+ #use-unknown-word-labels = true
252
+
253
+ ### if phrase extraction should be skipped,
254
+ # point to stem for extract files
255
+ #
256
+ # extracted-phrases =
257
+
258
+ ### settings for rule scoring
259
+ #
260
+ score-settings = "--GoodTuring"
261
+
262
+ ### include word alignment in phrase table
263
+ #
264
+ #include-word-alignment-in-rules = yes
265
+
266
+ ### if phrase table training should be skipped,
267
+ # point to phrase translation table
268
+ #
269
+ # phrase-translation-table =
270
+
271
+ ### if reordering table training should be skipped,
272
+ # point to reordering table
273
+ #
274
+ # reordering-table =
275
+
276
+ ### if training should be skipped,
277
+ # point to a configuration file that contains
278
+ # pointers to all relevant model files
279
+ #
280
+ #config =
281
+
282
+ #####################################################
283
+ ### TUNING: finding good weights for model components
284
+
285
+ [TUNING]
286
+
287
+ ### instead of tuning with this setting, old weights may be recycled
288
+ # specify here an old configuration file with matching weights
289
+ #
290
+ weight-config = $toy-data/weight.ini
291
+
292
+ ### tuning script to be used
293
+ #
294
+ tuning-script = $moses-script-dir/training/mert-moses.pl
295
+ tuning-settings = "-mertdir $moses-src-dir/mert"
296
+
297
+ ### specify the corpus used for tuning
298
+ # it should contain 1000s of sentences
299
+ #
300
+ #input-sgm =
301
+ #raw-input =
302
+ #tokenized-input =
303
+ #factorized-input =
304
+ #input =
305
+ #
306
+ #reference-sgm =
307
+ #raw-reference =
308
+ #tokenized-reference =
309
+ #factorized-reference =
310
+ #reference =
311
+
312
+ ### size of n-best list used (typically 100)
313
+ #
314
+ nbest = 100
315
+
316
+ ### ranges for weights for random initialization
317
+ # if not specified, the tuning script will use generic ranges
318
+ # it is not clear, if this matters
319
+ #
320
+ # lambda =
321
+
322
+ ### additional flags for the filter script
323
+ #
324
+ filter-settings = ""
325
+
326
+ ### additional flags for the decoder
327
+ #
328
+ decoder-settings = ""
329
+
330
+ ### if tuning should be skipped, specify this here
331
+ # and also point to a configuration file that contains
332
+ # pointers to all relevant model files
333
+ #
334
+ #config =
335
+
336
+ #########################################################
337
+ ## RECASER: restore case, this part only trains the model
338
+
339
+ [RECASING]
340
+
341
+ #decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm
342
+
343
+ ### training data
344
+ # raw input needs to be still tokenized,
345
+ # also also tokenized input may be specified
346
+ #
347
+ #tokenized = [LM:europarl:tokenized-corpus]
348
+
349
+ # recase-config =
350
+
351
+ #lm-training = $srilm-dir/ngram-count
352
+
353
+ #######################################################
354
+ ## TRUECASER: train model to truecase corpora and input
355
+
356
+ [TRUECASER]
357
+
358
+ ### script to train truecaser models
359
+ #
360
+ trainer = $moses-script-dir/recaser/train-truecaser.perl
361
+
362
+ ### training data
363
+ # data on which truecaser is trained
364
+ # if no training data is specified, parallel corpus is used
365
+ #
366
+ # raw-stem =
367
+ # tokenized-stem =
368
+
369
+ ### trained model
370
+ #
371
+ # truecase-model =
372
+
373
+ ######################################################################
374
+ ## EVALUATION: translating a test set using the tuned system and score it
375
+
376
+ [EVALUATION]
377
+
378
+ ### additional flags for the filter script
379
+ #
380
+ #filter-settings = ""
381
+
382
+ ### additional decoder settings
383
+ # switches for the Moses decoder
384
+ #
385
+ decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000"
386
+
387
+ ### specify size of n-best list, if produced
388
+ #
389
+ #nbest = 100
390
+
391
+ ### multiple reference translations
392
+ #
393
+ #multiref = yes
394
+
395
+ ### prepare system output for scoring
396
+ # this may include detokenization and wrapping output in sgm
397
+ # (needed for nist-bleu, ter, meteor)
398
+ #
399
+ detokenizer = "$moses-script-dir/tokenizer/detokenizer.perl -l $output-extension"
400
+ #recaser = $moses-script-dir/recaser/recase.perl
401
+ wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension"
402
+ #output-sgm =
403
+
404
+ ### BLEU
405
+ #
406
+ nist-bleu = $moses-script-dir/generic/mteval-v12.pl
407
+ nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c"
408
+ #multi-bleu = $moses-script-dir/generic/multi-bleu.perl
409
+ #ibm-bleu =
410
+
411
+ ### TER: translation error rate (BBN metric) based on edit distance
412
+ # not yet integrated
413
+ #
414
+ # ter =
415
+
416
+ ### METEOR: gives credit to stem / worknet synonym matches
417
+ # not yet integrated
418
+ #
419
+ # meteor =
420
+
421
+ ### Analysis: carry out various forms of analysis on the output
422
+ #
423
+ analysis = $moses-script-dir/ems/support/analysis.perl
424
+ #
425
+ # also report on input coverage
426
+ analyze-coverage = yes
427
+ #
428
+ # also report on phrase mappings used
429
+ report-segmentation = yes
430
+ #
431
+ # report precision of translations for each input word, broken down by
432
+ # count of input word in corpus and model
433
+ #report-precision-by-coverage = yes
434
+ #
435
+ # further precision breakdown by factor
436
+ #precision-by-coverage-factor = pos
437
+
438
+ [EVALUATION:test]
439
+
440
+ ### input data
441
+ #
442
+ input-sgm = $toy-data/test-src.$input-extension.sgm
443
+ # raw-input =
444
+ # tokenized-input =
445
+ # factorized-input =
446
+ # input =
447
+
448
+ ### reference data
449
+ #
450
+ reference-sgm = $toy-data/test-ref.$output-extension.sgm
451
+ # raw-reference =
452
+ # tokenized-reference =
453
+ # reference =
454
+
455
+ ### analysis settings
456
+ # may contain any of the general evaluation analysis settings
457
+ # specific setting: base coverage statistics on earlier run
458
+ #
459
+ #precision-by-coverage-base = $working-dir/evaluation/test.analysis.5
460
+
461
+ ### wrapping frame
462
+ # for nist-bleu and other scoring scripts, the output needs to be wrapped
463
+ # in sgm markup (typically like the input sgm)
464
+ #
465
+ wrapping-frame = $input-sgm
466
+
467
+ ##########################################
468
+ ### REPORTING: summarize evaluation scores
469
+
470
+ [REPORTING]
471
+
472
+ ### currently no parameters for reporting section
473
+
mosesdecoder/cruise-control/create-binary.perl ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env perl
2
+
3
+ use strict;
4
+
5
+ my $HOME = $ENV{"HOME"};
6
+ my $HOSTNAME = "s0565741\@thor.inf.ed.ac.uk";
7
+
8
+ my $sriPath = $ARGV[0];
9
+
10
+ my $cmd;
11
+
12
+ # what machine
13
+ my $machine = `uname`;
14
+ chomp($machine);
15
+
16
+ # COMPILE
17
+ $cmd = "git checkout master && git pull";
18
+ print STDERR "Executing: $cmd \n";
19
+ system($cmd);
20
+
21
+ $cmd = "make -f contrib/Makefiles/install-dependencies.gmake && ./compile.sh --without-tcmalloc";
22
+ print STDERR "Executing: $cmd \n";
23
+ system($cmd);
24
+
25
+ #ZIP
26
+ if ($machine eq "Darwin") {
27
+ $machine = "mac";
28
+ }
29
+
30
+ $cmd = "mkdir -p mt-tools/moses && mv bin lib mt-tools/moses";
31
+ print STDERR "Executing: $cmd \n";
32
+ system($cmd);
33
+
34
+ $cmd = "tar -zcvf $machine.tgz mt-tools";
35
+ print STDERR "Executing: $cmd \n";
36
+ system($cmd);
37
+
38
+ # UPLOAD
39
+ my $date = `date "+%F"`;
40
+ chomp($date);
41
+
42
+ my $targetDir = "/fs/thor1/hieu/binaries/$date/";
43
+ print STDERR "Directory=$targetDir\n";
44
+
45
+ $cmd = "ssh $HOSTNAME mkdir -p $targetDir";
46
+ print STDERR "Executing: $cmd \n";
47
+ system($cmd);
48
+
49
+ $cmd = "rsync -rv --delete $machine.tgz $HOSTNAME:$targetDir";
50
+ print STDERR "Executing: $cmd \n";
51
+ system($cmd);
52
+
53
+ $cmd = "rm $machine.tgz";
54
+ print STDERR "Executing: $cmd \n";
55
+ system($cmd);
mosesdecoder/cruise-control/example.config ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # the tmp directory
2
+ MCC_TEMPDIR="/disk4/cruise-control/cruise-control/tmp/"
3
+
4
+ # where logs are saved
5
+ MCC_LOGDIR="/disk4/cruise-control/cruise-control/"
6
+
7
+ # repository that will be checked out
8
+ MCC_GITREPO="https://github.com/moses-smt/mosesdecoder.git"
9
+
10
+ # arguments passed to Moses configure
11
+ MCC_CONFIGURE_ARGS=" --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
12
+
13
+ ALTERNATIVE_CONFIGURE_ARGS=(
14
+ " --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
15
+ " --with-srilm=/disk4/cruise-control/srilm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
16
+ " --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
17
+ " --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-boost=/disk4/cruise-control/boost_1_55_0 --with-xmlrpc-c=/disk4/cruise-control/xmlrpc-c -j8 "
18
+ " --with-srilm=/disk4/cruise-control/srilm --with-irstlm=/disk4/cruise-control/irstlm --with-dalm=/disk4/cruise-control/DALM --with-cmph=/disk4/cruise-control/cmph-2.0 --with-boost=/disk4/cruise-control/boost_1_55_0 --no-xmlrpc-c -j8 "
19
+ )
20
+
21
+ # list of branches to be checked
22
+ MCC_SCAN_BRANCHES="origin/master"
23
+
24
+ # run full training/eval pipeline using EMS?
25
+ MCC_RUN_EMS="yes"
mosesdecoder/cruise-control/shorten_info.pl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/perl
2
+
3
+ use strict;
4
+ use warnings;
5
+
6
+ while (<>) {
7
+ last if $_ =~ m/^diff --git/;
8
+ print $_;
9
+ }
mosesdecoder/cruise-control/test_all_new_commits.sh ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # given a config file runs tests on all untested commits of the scanned branches
3
+ # storing detailed logs to logs/CONFIGNAME/commit
4
+ # and extending the file brief.log
5
+ #
6
+ # A commit is assumed to be tested, if logs/CONFIGNAME/commit exists
7
+ #
8
+ # Ondrej Bojar, Ales Tamchyna, 2011
9
+
10
+ function warn() { echo "$@" >&2; }
11
+ function die() { echo "$@" >&2; exit 1; }
12
+ set -o pipefail # safer pipes
13
+
14
+ configf="$1"
15
+ [ -e "$configf" ] || die "usage: $0 configfile"
16
+ configname=$(basename $configf | sed 's/\.config$//')
17
+
18
+ source "$configf"
19
+
20
+ # beautifier
21
+ git clone git@github.com:moses-smt/mosesdecoder.git /tmp/moses
22
+ cd /tmp/moses
23
+ ./scripts/other/beautify.py --format --skip-perltidy
24
+ git commit -am "daily automatic beautifier"
25
+ git push
26
+ rm -rf /tmp/moses
27
+ cd -
28
+
29
+ [ -z "$MCC_SCAN_BRANCHES" ] \
30
+ && die "Bad config $configf; does not define MCC_SCAN_BRANCHES"
31
+
32
+ # use the given tempdir or make subdir tmp here
33
+ USE_TEMPDIR=$MCC_TEMPDIR
34
+ [ -d "$USE_TEMPDIR" ] || USE_TEMPDIR=./tmp
35
+
36
+ LOGDIR=$MCC_LOGDIR
37
+ [ -d "$LOGDIR" ] || LOGDIR=.
38
+
39
+ # ensure full path for logdir
40
+ LOGDIR=$(readlink -f "$LOGDIR")
41
+ [ -d "$LOGDIR" ] || die "Fatal: confusing readlink for $LOGDIR"
42
+
43
+ # this is where moses is cloned into
44
+ WORKDIR=$MCC_WORKDIR
45
+ [ -d "$WORKDIR" ] || WORKDIR=$USE_TEMPDIR/workdir
46
+
47
+ MYDIR=$(pwd)
48
+
49
+ # this is where moses is taken from
50
+ GITREPO="$MCC_GITREPO"
51
+ [ -n "$GITREPO" ] || GITREPO=/home/obo/moses-at-google-code
52
+
53
+ # location of moses regression test data archive (assumes url at the moment)
54
+ REGTEST_ARCHIVE="$MCC_REGTEST_ARCHIVE"
55
+ [ -n "$REGTEST_ARCHIVE" ] \
56
+ || REGTEST_ARCHIVE="git://github.com/moses-smt/moses-regression-tests.git"
57
+
58
+ if [ ! -d "$WORKDIR" ]; then
59
+ mkdir "$WORKDIR" || die "Failed to create workdir $WORKDIR"
60
+ warn "Cloning $GITREPO into $WORKDIR"
61
+ git clone $GITREPO $WORKDIR \
62
+ || die "Failed to git clone into workdir $WORKDIR"
63
+ else
64
+ ( cd "$WORKDIR" && git fetch ) \
65
+ || die "Failed to update our clone at $WORKDIR"
66
+ fi
67
+
68
+ mkdir -p $LOGDIR/logs/$configname \
69
+ || die "Failed to create dir $LOGDIR/logs/$configname"
70
+
71
+ #### How is one test performed
72
+ function run_single_test () {
73
+ commit=$1
74
+ first_char=$(echo $commit | grep -o '^.')
75
+ longlog="$LOGDIR/logs/$configname/$first_char/$commit.log"
76
+ warn "Testing commit $commit"
77
+
78
+ # Get the version of this script
79
+ ccversion=$(svnversion 2>/dev/null)
80
+ [ ! -z "$ccversion" ] || ccversion=$(git show 2>&1 | head -n 1)
81
+ [ ! -z "$ccversion" ] || ccversion="unknown"
82
+
83
+ # Create log header with computer details:
84
+ echo "#### Moses Cruise Control Log for commit $commit" > $longlog
85
+ date >> $longlog
86
+ echo "## Cruise Control version" >> $longlog
87
+ echo $ccversion >> $longlog
88
+ echo "## Parameters" >> $longlog
89
+ cat $MYDIR/$configf >> $longlog
90
+ echo "## Envinronment" >> $longlog
91
+ uname -a >> $longlog
92
+ env >> $longlog
93
+
94
+ git checkout --force $commit 2>/dev/null || die "Failed to checkout commit $commit"
95
+
96
+ err=""
97
+
98
+ cd regression-testing
99
+ #regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///')
100
+
101
+ # download data for regression tests if necessary
102
+ regtest_dir=$PWD/moses-reg-test-data
103
+ if [ -e $regtest_dir ]; then
104
+ (cd $regtest_dir; git pull) &> /dev/null ||
105
+ die "Failed to update regression testing data"
106
+ else
107
+ git clone $REGTEST_ARCHIVE $regtest_dir &> /dev/null ||
108
+ die "Failed to clone regression testing data"
109
+ fi
110
+ #if [ ! -f $regtest_file.ok ]; then
111
+ # wget $REGTEST_ARCHIVE &> /dev/null \
112
+ # || die "Failed to download data for regression tests"
113
+ # tar xzf $regtest_file
114
+ # touch $regtest_file.ok
115
+ #fi
116
+ #regtest_dir=$PWD/$(basename $regtest_file .tgz)
117
+ cd ..
118
+
119
+ # test build with different configurations
120
+ echo "## test build with different configurations" >> $longlog
121
+ for configArgs in "${ALTERNATIVE_CONFIGURE_ARGS[@]}"
122
+ do
123
+ echo "building with args: $configArgs" >> $longlog
124
+ ./bjam clean -a $configArgs >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
125
+ done
126
+
127
+ echo "## ./bjam clean" >> $longlog
128
+ ./bjam clean -a $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
129
+
130
+ echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
131
+ if [ -z "$err" ]; then
132
+ ./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam"
133
+ fi
134
+
135
+ echo "## regression tests" >> $longlog
136
+ if [ -z "$err" ]; then
137
+ ./bjam $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || err="regression tests"
138
+ fi
139
+
140
+ if [ -z "$err" ] && [ "$MCC_RUN_EMS" = "yes" ]; then
141
+ echo "## EMS" >> $longlog
142
+ if [ ! -f "giza-pp.ok" ]; then # fetch & compile Giza++
143
+ svn checkout http://giza-pp.googlecode.com/svn/trunk/ giza-pp \
144
+ || die "Failed to fetch Giza++"
145
+ cd giza-pp && make || die "Failed to compile Giza++"
146
+ mkdir -p bin
147
+ ln -s ../GIZA++-v2/GIZA++ ../GIZA++-v2/snt2cooc.out ../mkcls-v2/mkcls bin/
148
+ cd ..
149
+ touch giza-pp.ok
150
+ fi
151
+ ./bjam $MCC_CONFIGURE_ARGS" || err="bjam"
152
+ srilm_dir=$(echo $MCC_CONFIGURE_ARGS | sed -r 's/.*--with-srilm=([^ ]+) .*/\1/')
153
+ mach_type=$($srilm_dir/sbin/machine-type)
154
+ mkdir -p "$WORKDIR/ems_workdir"
155
+ rm -rf "$WORKDIR/ems_workdir/"* # clean any previous experiments
156
+ cat $MYDIR/config.ems \
157
+ | sed \
158
+ -e "s#WORKDIR#$WORKDIR#" \
159
+ -e "s#SRILMDIR#$srilm_dir#" \
160
+ -e "s#MACHINE_TYPE#$mach_type#" \
161
+ > ./config.ems
162
+ scripts/ems/experiment.perl \
163
+ -no-graph -exec -config $(pwd)/config.ems &>> $longlog \
164
+ || die "Running EMS failed"
165
+ [ -f $WORKDIR/ems_workdir/steps/1/REPORTING_report.1.DONE ] || err="ems"
166
+ fi
167
+
168
+ echo "## Finished" >> $longlog
169
+ date >> $longlog
170
+
171
+ if [ -z "$err" ]; then
172
+ status="OK"
173
+ else
174
+ git reset --hard HEAD
175
+ status="FAIL:$err"
176
+ fi
177
+ echo "## Status: $status" >> $longlog
178
+
179
+ nicedate=$(date +"%Y%m%d-%H%M%S")
180
+ echo "$commit$status$configname$ccversion$nicedate" \
181
+ >> "$LOGDIR/brief.log"
182
+
183
+ if [ -z "$err" ]; then
184
+ touch "$LOGDIR/logs/$configname/$first_char/$commit.OK"
185
+ else
186
+ return 1;
187
+ fi
188
+ }
189
+
190
+ cd $WORKDIR || die "Failed to chdir to $WORKDIR"
191
+
192
+ # update the revision lists for all watched branches
193
+ for i in $MCC_SCAN_BRANCHES; do
194
+ git rev-list $i > "$LOGDIR/logs/$configname/$(echo -n $i | sed 's/^.*\///').revlist"
195
+ done
196
+
197
+ # create info files for new commits
198
+ for i in $(git rev-list $MCC_SCAN_BRANCHES); do
199
+ first_char=$(echo $i | grep -o '^.')
200
+ mkdir -p "$LOGDIR/logs/$configname/$first_char"
201
+ [ -f "$LOGDIR/logs/$configname/$first_char/$i.info" ] && break;
202
+ git show $i | $MYDIR/shorten_info.pl > "$LOGDIR/logs/$configname/$first_char/$i.info"
203
+ done
204
+
205
+ #### Main loop over all commits
206
+ for i in $MCC_SCAN_BRANCHES; do
207
+ warn "On branch $i"
208
+ git rev-list $i \
209
+ | while read commit; do
210
+ first_char=$(echo $commit | grep -o '^.')
211
+ test_done="$LOGDIR/logs/$configname/$first_char/$commit.log"
212
+ if [ ! -e "$test_done" ]; then
213
+ run_single_test $commit && warn "Commit $commit test ok, stopping" && break
214
+ warn "Commit $commit test failed, continuing"
215
+ else
216
+ warn "Reached a previously tested commit ($commit), stopping"
217
+ break
218
+ fi
219
+ done
220
+ done
mosesdecoder/cruise-control/web/html_templates.php ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ function show_header($title)
4
+ {
5
+ echo "
6
+ <html>
7
+ <head>
8
+ <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html;charset=utf-8\">
9
+ <title>$title</title>
10
+ </head><body>";
11
+ }
12
+
13
+ function show_heading($text, $size = 1)
14
+ {
15
+ echo "
16
+ <h$size>$text</h$size>";
17
+ }
18
+
19
+ function show_footer()
20
+ {
21
+ echo "
22
+ </body>
23
+ <html>";
24
+ }
25
+
26
+ function end_table()
27
+ {
28
+ echo "
29
+ </table>";
30
+ }
31
+
32
+ function array_to_table_row($odd = true, $data)
33
+ {
34
+ $bgcolor = $odd ? " bgcolor=\"#ccccdd\"" : "";
35
+ echo "
36
+ <tr$bgcolor>";
37
+ foreach ($data as &$item) {
38
+ echo "
39
+ <td style=\"padding-left:8px; padding-right:8px\">$item</td>";
40
+ }
41
+ echo "
42
+ </tr>";
43
+ }
44
+
45
+ function start_table()
46
+ {
47
+ echo '
48
+ <table rules="cols" frame="vsides">';
49
+ }
50
+
51
+ function start_form($action, $method = "get")
52
+ {
53
+ echo "
54
+ <form action=\"$action\" method=\"$method\">";
55
+ }
56
+
57
+ function end_form()
58
+ {
59
+ echo "
60
+ </form>";
61
+ }
62
+
63
+ function show_select_box($items, $name, $selected = "", $onchange_hdl = "")
64
+ {
65
+ $onchange = $onchange_hdl ? " onchange=\"$onchange_hdl\"" : "";
66
+ echo "
67
+ <select name=\"$name\"$onchange>";
68
+ foreach ($items as &$item) {
69
+ $item_selected = $selected == $item ? " selected=\"yes\"" : "";
70
+ echo "
71
+ <option value=\"$item\"$item_selected>$item</option>";
72
+ }
73
+ echo "
74
+ </select>";
75
+ }
76
+
77
+ function get_href($label, $url, $new_window = false)
78
+ {
79
+ $target = $new_window ? " target=\"_blank\"" : "";
80
+ return "<a href=\"$url\"$target>$label</a>";
81
+ }
82
+
83
+ function warn($msg)
84
+ {
85
+ echo "<p><font color=\"red\"><b>$msg</b></font>";
86
+ }
87
+
88
+ function get_current_url()
89
+ {
90
+ return $_SERVER["REQUEST_URI"];
91
+ }
92
+
93
+ function set_var($url, $var, $value)
94
+ {
95
+ $url = cut_var($url, $var);
96
+ if ($url[strlen($url) - 1] == "?") {
97
+ $url .= "$var=$value";
98
+ } elseif (strpos($url, "?") !== false) {
99
+ $url .= "&$var=$value";
100
+ } else {
101
+ $url .= "?$var=$value";
102
+ }
103
+ return $url;
104
+ }
105
+
106
+ function cut_var($url, $var)
107
+ {
108
+ // XXX there is probably a cleaner solution for this
109
+ return preg_replace('/&?' . $var . '=[^&]+/', '', $url);
110
+ }
111
+
112
+ ?>
mosesdecoder/cruise-control/web/index.php ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ include("html_templates.php");
4
+ include("log_wrapper.php");
5
+
6
+ const SHOW_ITEMS = 50;
7
+ const GITHUB_LINK = "https://github.com/moses-smt/mosesdecoder/commit/";
8
+
9
+ show_header("Moses Cruise Control");
10
+ echo "\n<center>\n";
11
+
12
+ show_heading("Moses Cruise Control");
13
+ echo "\n</center>\n";
14
+
15
+ // show current status of 'master' branch
16
+ $master_branch = new Branch("master");
17
+ $last_commit = $master_branch->get_next_commit();
18
+ $last_commit->read_log();
19
+ show_heading("Current status of master: " . colorize_status($last_commit->get_status()), 3);
20
+ $branch_name = ! empty($_GET["branch"]) ? $_GET["branch"] : "master";
21
+
22
+ // check that user wants to see a valid branch
23
+ $all_branches = get_all_branch_names();
24
+ if (! in_array($branch_name, $all_branches)) {
25
+ warn("Branch '$branch_name' not found (only branches with some tests done can be viewed)");
26
+ $branch_name = "master";
27
+ }
28
+
29
+ // branch select box
30
+ start_form("", "get");
31
+ echo "<p>Showing log of branch: ";
32
+ show_select_box($all_branches, "branch", $branch_name, "submit()");
33
+ end_form();
34
+
35
+ $branch = new Branch("$branch_name");
36
+ $start_with = ! empty($_GET["start"]) ? $_GET["start"] : 0;
37
+ $branch->set_line($start_with);
38
+
39
+ show_navigation($start_with);
40
+
41
+ // table of commits
42
+ start_table();
43
+ array_to_table_row(true, array("<b>Commit Link</b>", "<b>Status</b>", "<b>Full Log</b>",
44
+ "<b>Timestamp</b>", "<b>Author</b>", "<b>Commit Message</b>" ));
45
+ for ($i = 0; $i < SHOW_ITEMS; $i++) {
46
+ $last_commit = $branch->get_next_commit();
47
+
48
+ if ( $last_commit->get_name() == "" ) {
49
+ array_to_table_row(array("=== End of log ==="));
50
+ break;
51
+ }
52
+ $last_commit->read_log();
53
+ $last_commit->read_info();
54
+
55
+ array_to_table_row(($i % 2 == 1),
56
+ array( get_href(substr($last_commit->get_name(), 0, 10) . "...", GITHUB_LINK . $last_commit->get_name(), true),
57
+ colorize_status($last_commit->get_status()),
58
+ $last_commit->was_tested() ? get_href("Log", $last_commit->get_log_file(), true) : "N/A",
59
+ $last_commit->get_timestamp(),
60
+ $last_commit->get_author(),
61
+ substr($last_commit->get_message(), 0, 30) . (strlen($last_commit->get_message()) > 30 ? "..." : "")));
62
+ }
63
+
64
+ end_table();
65
+
66
+ show_navigation($start_with);
67
+ show_footer();
68
+
69
+ // HTML ends here
70
+
71
+ function colorize_status($status)
72
+ {
73
+ switch ( substr(strtolower($status), 0, 1) ) {
74
+ case "o":
75
+ $color = "green";
76
+ break;
77
+ case "f":
78
+ $color = "red";
79
+ break;
80
+ default:
81
+ $color = "#FFDD00";
82
+ }
83
+ return "<font color=\"$color\"><b>$status</b></font>";
84
+ }
85
+
86
+ function show_navigation($start_with)
87
+ {
88
+ start_form("", "get");
89
+ if ($start_with > 0) {
90
+ echo get_href("<p>Previous",
91
+ set_var(get_current_url(), "start", max(0, $start_with - SHOW_ITEMS)));
92
+ } else {
93
+ echo "Previous";
94
+ }
95
+ echo " ";
96
+
97
+ echo get_href("Next", set_var(get_current_url(), "start", $start_with + SHOW_ITEMS));
98
+ end_form();
99
+ }
100
+
101
+ ?>