sleepyhead111 commited on
Commit
3f81909
·
verified ·
1 Parent(s): 7c09f21

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. mosesdecoder/contrib/arrow-pipelines/README +58 -0
  2. mosesdecoder/contrib/arrow-pipelines/documentation/training-pipeline/moses-pypeline.dia +0 -0
  3. mosesdecoder/contrib/arrow-pipelines/pcl/Makefile +23 -0
  4. mosesdecoder/contrib/arrow-pipelines/pcl/components/Makefile +24 -0
  5. mosesdecoder/contrib/arrow-pipelines/pcl/components/src_trg_tokeniser.cfg +10 -0
  6. mosesdecoder/contrib/arrow-pipelines/pcl/components/src_trg_tokeniser.pcl +40 -0
  7. mosesdecoder/contrib/arrow-pipelines/pcl/components/translation_model_training.cfg +15 -0
  8. mosesdecoder/contrib/arrow-pipelines/pcl/components/translation_model_training.pcl +70 -0
  9. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/Makefile +14 -0
  10. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/__init__.py +0 -0
  11. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/cleanup/__init__.py +0 -0
  12. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/cleanup/cleanup.py +129 -0
  13. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/__init__.py +0 -0
  14. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/data_split.cfg +7 -0
  15. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/data_split.py +144 -0
  16. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/test_data/data.de +50 -0
  17. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/test_data/data.en +50 -0
  18. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/irstlm_build/__init__.py +0 -0
  19. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/irstlm_build/irstlm_build.py +117 -0
  20. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/mert/__init__.py +0 -0
  21. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/mert/mert.py +98 -0
  22. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/model_training/__init__.py +0 -0
  23. mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/model_training/model_training.py +103 -0
  24. mosesdecoder/contrib/arrow-pipelines/pcl/training_pipeline.cfg +21 -0
  25. mosesdecoder/contrib/arrow-pipelines/pcl/training_pipeline.pcl +117 -0
  26. mosesdecoder/contrib/checkplf/Makefile +2 -0
  27. mosesdecoder/contrib/lmserver/Makefile.in +645 -0
  28. mosesdecoder/contrib/lmserver/daemon.c +68 -0
  29. mosesdecoder/contrib/lmserver/lmserver.c +2140 -0
  30. mosesdecoder/contrib/lmserver/missing +367 -0
  31. mosesdecoder/contrib/lmserver/srilm.h +8 -0
  32. mosesdecoder/contrib/tmcombine/README.md +88 -0
  33. mosesdecoder/contrib/tmcombine/argparse.py +2382 -0
  34. mosesdecoder/contrib/tmcombine/test/model3/model/lex.counts.e2f +8 -0
  35. mosesdecoder/contrib/tmcombine/test/model3/model/lex.counts.f2e +8 -0
  36. mosesdecoder/contrib/tmcombine/test/model3/model/lex.e2f +8 -0
  37. mosesdecoder/contrib/tmcombine/test/model3/model/lex.f2e +8 -0
  38. mosesdecoder/contrib/tmcombine/test/model3/model/phrase-table +8 -0
  39. mosesdecoder/contrib/tmcombine/test/model5/model/lex.counts.e2f +8 -0
  40. mosesdecoder/contrib/tmcombine/test/model5/model/lex.counts.f2e +8 -0
  41. mosesdecoder/contrib/tmcombine/test/model5/model/lex.e2f +8 -0
  42. mosesdecoder/contrib/tmcombine/test/model5/model/lex.f2e +8 -0
  43. mosesdecoder/contrib/tmcombine/test/model5/model/phrase-table +8 -0
  44. mosesdecoder/contrib/tmcombine/test/model6/model/lex.counts.e2f +8 -0
  45. mosesdecoder/contrib/tmcombine/test/model6/model/lex.counts.f2e +8 -0
  46. mosesdecoder/contrib/tmcombine/test/model6/model/lex.e2f +8 -0
  47. mosesdecoder/contrib/tmcombine/test/model6/model/lex.f2e +8 -0
  48. mosesdecoder/contrib/tmcombine/test/model6/model/phrase-table +5 -0
  49. mosesdecoder/contrib/tmcombine/test/phrase-table_test1 +8 -0
  50. mosesdecoder/contrib/tmcombine/test/phrase-table_test10 +9 -0
mosesdecoder/contrib/arrow-pipelines/README ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Arrow Based Moses Training Pipeline
2
+ ===================================
3
+
4
+ This demonstration implements a training pipeline that is shown in the Dia diagram in documentation/training-pipeline/moses-pypeline.dia.
5
+
6
+ The demo has been tested with:
7
+
8
+ - Moses v1.0
9
+ - Giza++ v1.0.7
10
+ - IRSTLM v5.70.04
11
+
12
+
13
+ Setup
14
+ -----
15
+
16
+ To use the demonstration you must first initialise the git submodules for this clone. Return to the top level directory and issue the following command:
17
+
18
+ $ git submodule update --init --recursive
19
+
20
+ This will clone PCL, available at Github (git://github.com/ianj-als/pcl.git), and Pypeline submodules, available at GitHub (git://github.com/ianj-als/pypeline.git).
21
+
22
+ Return to the arrow-pipelines contrib directory:
23
+
24
+ $ cd contrib/arrow-pipelines
25
+
26
+ To use the PCL compiler and run-time set the following environment variables (assuming Bash shell):
27
+
28
+ $ export PATH=$PATH:`pwd`/python/pcl/src/pclc:`pwd`/python/pcl/src/pcl-run
29
+ $ export PYTHONPATH=$PYTHONPATH:`pwd`/python/pcl/libs/pypeline/src
30
+ $ export PCL_IMPORT_PATH=`pwd`/python/pcl/src/runtime:`pwd`/pcl
31
+
32
+ Three environment variables need to be set before the pipeline can be run, they are:
33
+
34
+ - MOSES_HOME : The directory where Moses has been cloned, or installed,
35
+ - IRSTLM : The installation directory of your IRSTLM, and
36
+ - GIZA_HOME : The installation directory of GIZA++.
37
+
38
+
39
+ Building the example training pipeline
40
+ --------------------------------------
41
+
42
+ $ cd pcl
43
+ $ make
44
+
45
+
46
+ Running the example training pipeline
47
+ -------------------------------------
48
+
49
+ To execute the training pipeline run the following command:
50
+
51
+ $ pcl-run.py training_pipeline
52
+
53
+ Once complete the output of the pipeline can be found in the directories:
54
+
55
+ - training/tokenisation
56
+ - training/model
57
+ - training/lm
58
+ - training/mert
mosesdecoder/contrib/arrow-pipelines/documentation/training-pipeline/moses-pypeline.dia ADDED
Binary file (3.53 kB). View file
 
mosesdecoder/contrib/arrow-pipelines/pcl/Makefile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CC = pclc.py
2
+ CFLAGS=-i
3
+ SOURCES = training_pipeline.pcl
4
+ OBJS = $(SOURCES:.pcl=.py)
5
+ SUBDIRS = components
6
+
7
+ all: subdirs build
8
+
9
+ build: $(OBJS)
10
+
11
+ %.py: %.pcl
12
+ $(CC) $(CFLAGS) $<
13
+
14
+ clean:
15
+ for dir in $(SUBDIRS); do \
16
+ $(MAKE) -C $$dir clean; \
17
+ done
18
+ rm -f *.py *.pyc *.log *~
19
+
20
+ subdirs:
21
+ for dir in $(SUBDIRS); do \
22
+ $(MAKE) -C $$dir ; \
23
+ done
mosesdecoder/contrib/arrow-pipelines/pcl/components/Makefile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CC = pclc.py
2
+ CFLAGS = -i
3
+ SOURCES = src_trg_tokeniser.pcl translation_model_training.pcl
4
+ OBJS = $(SOURCES:.pcl=.py)
5
+ SUBDIRS = wrappers
6
+
7
+ all: subdirs build
8
+
9
+ build: $(OBJS)
10
+
11
+ %.py: %.pcl
12
+ $(CC) $(CFLAGS) $<
13
+
14
+ clean:
15
+ for dir in $(SUBDIRS); do \
16
+ $(MAKE) -C $$dir clean; \
17
+ done
18
+ rm -f *.py *.pyc *.log *~
19
+
20
+ subdirs:
21
+ for dir in $(SUBDIRS); do \
22
+ $(MAKE) -C $$dir ; \
23
+ done
24
+
mosesdecoder/contrib/arrow-pipelines/pcl/components/src_trg_tokeniser.cfg ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ [Configuration]
2
+ tokeniser.src.language = en
3
+ tokeniser.src.tokenisation_dir = test_data/src_trg_tokenizer/tokenised
4
+ tokeniser.trg.language = lt
5
+ tokeniser.trg.tokenisation_dir = test_data/src_trg_tokenizer/tokenised
6
+ tokeniser.moses.installation = /opt/moses
7
+
8
+ [Inputs]
9
+ src_filename = test_data/src_trg_tokenizer/cleantrain.en
10
+ trg_filename = test_data/src_trg_tokenizer/cleantrain.lt
mosesdecoder/contrib/arrow-pipelines/pcl/components/src_trg_tokeniser.pcl ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Import all of the components to be composed
3
+ #
4
+ import wrappers.tokenizer.tokenizer as tokeniser
5
+
6
+ #
7
+ # Component definition
8
+ #
9
+ # +---------+ +---------+ +---------+ +---------+
10
+ # src_filename -->+ +--> filename -->+-- src --+--> tokenised_filename -->+---------+--> tokenised_filename -->+ +--> tokenised_src_filename
11
+ # | | | | | | | |
12
+ # trg_filename -->+ +--> filename -->+---------+-------> filename ------->+-- trg --+--> tokenised_filename -->+ +--> tokenised_trg_filename
13
+ # +---------+ +---------+ +---------+ +---------+
14
+ # Config: {language::String, Config: {language::String,
15
+ # tokenisation_dir::String, tokenisation_dir::String,
16
+ # moses_installation_dir::String} moses_installation_dir::String}
17
+ #
18
+ component src_trg_tokeniser
19
+ inputs (src_filename), (trg_filename)
20
+ outputs (tokenised_src_filename), (tokenised_trg_filename)
21
+ configuration tokeniser.src.language,
22
+ tokeniser.src.tokenisation_dir,
23
+ tokeniser.trg.language,
24
+ tokeniser.trg.tokenisation_dir,
25
+ tokeniser.moses.installation
26
+ declare
27
+ src_tokeniser := new tokeniser with
28
+ tokeniser.src.language -> corpus.language,
29
+ tokeniser.src.tokenisation_dir -> working.directory.root,
30
+ tokeniser.moses.installation -> moses.installation
31
+ trg_tokeniser := new tokeniser with
32
+ tokeniser.trg.language -> corpus.language,
33
+ tokeniser.trg.tokenisation_dir -> working.directory.root,
34
+ tokeniser.moses.installation -> moses.installation
35
+ as
36
+ wire (src_filename -> corpus.filename),
37
+ (trg_filename -> corpus.filename) >>>
38
+ (src_tokeniser *** trg_tokeniser) >>>
39
+ wire (corpus.tokenised.filename -> tokenised_src_filename),
40
+ (corpus.tokenised.filename -> tokenised_trg_filename)
mosesdecoder/contrib/arrow-pipelines/pcl/components/translation_model_training.cfg ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Configuration]
2
+ model_training.max_segment_length = 20
3
+ model_training.corpus.development_size = 4500
4
+ model_training.corpus.evaluation_size = 5000
5
+ model_training.src.language = en
6
+ model_training.trg.language = lt
7
+ model_training.method.alignment = grow-diag-final-and
8
+ model_training.method.reordering = msd-bidirectional-fe
9
+ model_training.moses.installation = /opt/moses
10
+ model_training.giza.installation = /opt/moses/giza++-v1.0.7
11
+ model_training.translation_model.dir = test_data/translation_model_training/translation_model
12
+
13
+ [Inputs]
14
+ src_filename = test_data/translation_model_training/cleantrain.en
15
+ trg_filename = test_data/translation_model_training/cleantrain.lt
mosesdecoder/contrib/arrow-pipelines/pcl/components/translation_model_training.pcl ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Import all of the components to be composed
3
+ #
4
+ import wrappers.cleanup.cleanup as cleanup
5
+ import wrappers.data_split.data_split as data_split
6
+ import wrappers.model_training.model_training as model_training
7
+
8
+ #
9
+ # Component definition
10
+ #
11
+ # {cleaned_src_filename, {src_filename, {[devel|eval|train]_src_filename, {src_filename, {moses_ini_file,
12
+ # cleaned_trg_filename} trg_filename} [devel|eval|train]_trg_filename} trg_filename} evaluation_data_filename}
13
+ # | | | | +-------+ |
14
+ # +-------+ | | +-------+ | +-------+ V | Model | {moses_ini_file} +-------+ V
15
+ # | Clean | V V | Data | V | +---------------->+ Train +----------------->+ Merge +----->
16
+ # {src_filename, -->+ +----->+ +------------->+ Split | +-------+ +---+---+
17
+ # trg_filename} | Up | | Split | | +---\ Config: {[src|trg]_language::String, ^
18
+ # +-------+ +-------+ +-------+ | alignment_method::String, |
19
+ # Config: {segment_length::Int} Config: {development_size::Int, | reordering_method::String, |
20
+ # evaluation_size::Int} | giza_installation_dir::String, |
21
+ # | model_directory::String} |
22
+ # \--------------------------------------------/
23
+ #
24
+ component translation_model_training
25
+ inputs src_filename, trg_filename
26
+ outputs evaluation_data_filename, moses_ini_filename
27
+ configuration model_training.max_segment_length,
28
+ model_training.corpus.development_size,
29
+ model_training.corpus.evaluation_size,
30
+ model_training.src.language,
31
+ model_training.trg.language,
32
+ model_training.method.alignment,
33
+ model_training.method.reordering,
34
+ model_training.moses.installation,
35
+ model_training.giza.installation,
36
+ model_training.translation_model.dir
37
+ declare
38
+ cleanup := new cleanup with
39
+ model_training.max_segment_length -> segment_length_limit
40
+ data_split := new data_split with
41
+ model_training.corpus.development_size -> development_data_size,
42
+ model_training.corpus.evaluation_size -> evaluation_data_size
43
+ model_training := new model_training with
44
+ model_training.src.language -> source_language,
45
+ model_training.trg.language -> target_language,
46
+ model_training.method.alignment -> alignment_method,
47
+ model_training.method.reordering -> reordering_method,
48
+ model_training.moses.installation -> moses_installation_dir,
49
+ model_training.giza.installation -> giza_installation_dir,
50
+ model_training.translation_model.dir -> translation_model_directory
51
+ as
52
+ cleanup >>>
53
+ wire cleaned_src_filename -> src_filename,
54
+ cleaned_trg_filename -> trg_filename >>>
55
+ data_split >>>
56
+ wire devel_src_filename -> devel_src_filename,
57
+ eval_src_filename -> evaluation_data_filename,
58
+ train_trg_filename -> _,
59
+ train_src_filename -> _,
60
+ eval_trg_filename -> _,
61
+ devel_trg_filename -> devel_trg_filename >>>
62
+ ((wire devel_src_filename -> src_filename,
63
+ devel_trg_filename -> trg_filename,
64
+ evaluation_data_filename -> _ >>>
65
+ model_training) &&&
66
+ wire evaluation_data_filename -> evaluation_data_filename,
67
+ devel_src_filename -> _,
68
+ devel_trg_filename -> _) >>>
69
+ merge top[moses_ini_filename] -> moses_ini_filename,
70
+ bottom[evaluation_data_filename] -> evaluation_data_filename
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/Makefile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SUBDIRS = tokenizer
2
+
3
+ all: subdirs
4
+
5
+ clean:
6
+ for dir in $(SUBDIRS); do \
7
+ $(MAKE) -C $$dir clean; \
8
+ done
9
+
10
+ subdirs:
11
+ for dir in $(SUBDIRS); do \
12
+ $(MAKE) -C $$dir ; \
13
+ done
14
+
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/__init__.py ADDED
File without changes
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/cleanup/__init__.py ADDED
File without changes
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/cleanup/cleanup.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_name():
2
+ return 'cleanup'
3
+
4
+ def get_inputs():
5
+ return ['src_filename', 'trg_filename']
6
+
7
+ def get_outputs():
8
+ return ['cleaned_src_filename', 'cleaned_trg_filename']
9
+
10
+ def get_configuration():
11
+ return ['segment_length_limit']
12
+
13
+ def configure(args):
14
+ return {'segment_length' : args['segment_length_limit']}
15
+
16
+ def initialise(config):
17
+ def _filter(limit, ifh1, ofh1, ifh2, ofh2):
18
+ def _short(line):
19
+ n = 0
20
+ for c in line:
21
+ if c == " ":
22
+ n += 1
23
+ return n < limit
24
+
25
+ for (l1, l2) in zip(ifh1, ifh2):
26
+ if _short(l1) and _short(l2):
27
+ print >>ofh1, l1,
28
+ print >>ofh2, l2,
29
+
30
+ def _make_cleaned_filename(filename):
31
+ bits = filename.split(".")
32
+ bits.insert(-1, "clean")
33
+ return ".".join(bits)
34
+
35
+ def _filter_main(a, s):
36
+ limit = config['segment_length']
37
+ (ifh1, ifh2, ofh1, ofh2) = (None, None, None, None)
38
+ try:
39
+ input_src_filename = a['src_filename']
40
+ input_trg_filename = a['trg_filename']
41
+
42
+ print "Cleanup: Cleaning [%s] and [%s]..." % (input_src_filename, input_trg_filename)
43
+
44
+ ifh1 = open(input_src_filename, "r")
45
+ ifh2 = open(input_trg_filename, "r")
46
+
47
+ cleaned_src_filename = _make_cleaned_filename(input_src_filename)
48
+ cleaned_trg_filename = _make_cleaned_filename(input_trg_filename)
49
+ ofh1 = open(cleaned_src_filename, "w")
50
+ ofh2 = open(cleaned_trg_filename, "w")
51
+
52
+ _filter(limit, ifh1, ofh1, ifh2, ofh2)
53
+
54
+ return {'cleaned_src_filename': cleaned_src_filename,
55
+ 'cleaned_trg_filename': cleaned_trg_filename}
56
+ finally:
57
+ def _safe_close(fh):
58
+ if fh is not None:
59
+ fh.close()
60
+ _safe_close(ifh1)
61
+ _safe_close(ifh2)
62
+ _safe_close(ofh1)
63
+ _safe_close(ofh2)
64
+
65
+ return _filter_main
66
+
67
+
68
+ if __name__ == '__main__':
69
+ import os
70
+ import tempfile
71
+ import test.test as thelp
72
+
73
+ from pypeline.helpers.helpers import eval_pipeline
74
+
75
+
76
+ def _test_main():
77
+ configuration = {'segment_length_limit': 20}
78
+
79
+ src_filename = tempfile.mkstemp(suffix = ".src", dir = "/tmp")
80
+ trg_filename = tempfile.mkstemp(suffix = ".trg", dir = "/tmp")
81
+
82
+ box_eval = {
83
+ 'src_filename': src_filename[1],
84
+ 'trg_filename': trg_filename[1],
85
+ 'cleaned_src_file_expected': src_filename[1] + ".expected",
86
+ 'cleaned_trg_file_expected': trg_filename[1] + ".expected"}
87
+
88
+ try:
89
+ _prep_files(box_eval)
90
+ _run_test(configuration, box_eval)
91
+ finally:
92
+ _cleanup_files(box_eval)
93
+
94
+
95
+ def _run_test(configuration, box_eval):
96
+ box_config = configure(configuration)
97
+ box = initialise(box_config)
98
+
99
+ output = eval_pipeline(box, box_eval, box_config)
100
+ try:
101
+ thelp.diff(box_eval['cleaned_src_file_expected'], output['cleaned_src_filename'])
102
+ thelp.diff(box_eval['cleaned_trg_file_expected'], output['cleaned_trg_filename'])
103
+ finally:
104
+ os.unlink(output['cleaned_src_filename'])
105
+ os.unlink(output['cleaned_trg_filename'])
106
+
107
+
108
+ def _line(line_lengths):
109
+ def _gen_line(tokens):
110
+ return " ".join(map(lambda n: "tok" + str(n), range(tokens)))
111
+ return map(_gen_line, line_lengths)
112
+
113
+
114
+ def _prep_files(box_eval):
115
+ thelp.cat(box_eval['src_filename'], _line([10, 20, 30, 40, 17, 21]))
116
+ thelp.cat(box_eval['trg_filename'], _line([40, 30, 20, 10, 20, 21]))
117
+ thelp.cat(box_eval['cleaned_src_file_expected'], _line([17]))
118
+ thelp.cat(box_eval['cleaned_trg_file_expected'], _line([20]))
119
+
120
+
121
+ def _cleanup_files(box_eval):
122
+ try:
123
+ for key, filename in box_eval.items():
124
+ os.unlink(filename)
125
+ except:
126
+ pass
127
+
128
+
129
+ _test_main()
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/__init__.py ADDED
File without changes
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/data_split.cfg ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [Configuration]
2
+ evaluation_data_size = 7
3
+ development_data_size = 13
4
+
5
+ [Inputs]
6
+ src_filename = test_data/data.en
7
+ trg_filename = test_data/data.de
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/data_split.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_name():
2
+ return 'data_split'
3
+
4
+ def get_inputs():
5
+ return ['src_filename', 'trg_filename']
6
+
7
+ def get_outputs():
8
+ return ['devel_src_filename', 'devel_trg_filename',
9
+ 'eval_src_filename', 'eval_trg_filename',
10
+ 'train_src_filename', 'train_trg_filename']
11
+
12
+ def get_configuration():
13
+ return ['evaluation_data_size', 'development_data_size']
14
+
15
+ def configure(args):
16
+ result = {}
17
+ result['evaluate_size'] = args['evaluation_data_size']
18
+ result['development_size'] = args['development_data_size']
19
+ return result
20
+
21
+ def initialise(config):
22
+ def _copy(size, inp, ofh1, ofh2):
23
+ try:
24
+ while size != 0:
25
+ (l1, l2) = inp.next()
26
+ print >>ofh1, l1,
27
+ print >>ofh2, l2,
28
+ size -= 1
29
+ except StopIteration:
30
+ pass
31
+
32
+ def _make_split_filename(filename, data_set):
33
+ bits = filename.split(".")
34
+ bits.insert(-1, data_set)
35
+
36
+ new_filename = ".".join(bits)
37
+ return new_filename
38
+
39
+ def _splitter_main(a, s):
40
+ (ifh1, ifh2, ofh1, ofh2) = (None, None, None, None)
41
+ try:
42
+ input_src_filename = a['src_filename']
43
+ input_trg_filename = a['trg_filename']
44
+
45
+ ifh1 = open(input_src_filename, "r")
46
+ ifh2 = open(input_trg_filename, "r")
47
+ inp = iter(zip(ifh1, ifh2))
48
+
49
+ result = {}
50
+ for (data_set, size) in [('devel', config['development_size']),
51
+ ('eval', config['evaluate_size']),
52
+ ('train', -1)]:
53
+ output_src_filename = _make_split_filename(input_src_filename, data_set)
54
+ output_trg_filename = _make_split_filename(input_trg_filename, data_set)
55
+ ofh1 = open(output_src_filename, "w")
56
+ ofh2 = open(output_trg_filename, "w")
57
+
58
+ _copy(size, inp, ofh1, ofh2)
59
+ result[data_set + '_src_filename'] = output_src_filename
60
+ result[data_set + '_trg_filename'] = output_trg_filename
61
+
62
+ return result
63
+ finally:
64
+ def _safe_close(fh):
65
+ if fh is not None:
66
+ fh.close()
67
+ _safe_close(ifh1)
68
+ _safe_close(ifh2)
69
+ _safe_close(ofh1)
70
+ _safe_close(ofh2)
71
+
72
+ return _splitter_main
73
+
74
+
75
+ if __name__ == '__main__':
76
+ import os
77
+ import tempfile
78
+ import test.test as thelp
79
+
80
+ from pypeline.helpers.helpers import eval_pipeline
81
+
82
+
83
+ def _test_main():
84
+ configuration = {'evaluation_data_size': 7,
85
+ 'development_data_size': 13}
86
+
87
+ src_filename = tempfile.mkstemp(suffix = ".src", dir = "/tmp")
88
+ trg_filename = tempfile.mkstemp(suffix = ".trg", dir = "/tmp")
89
+
90
+ box_eval = {'src_filename': src_filename[1],
91
+ 'trg_filename': trg_filename[1],
92
+ 'devel_src_expected': src_filename[1] + ".devel.expected",
93
+ 'devel_trg_expected': trg_filename[1] + ".devel.expected",
94
+ 'eval_src_expected': src_filename[1] + ".eval.expected",
95
+ 'eval_trg_expected': trg_filename[1] + ".eval.expected",
96
+ 'train_src_expected': src_filename[1] + ".train.expected",
97
+ 'train_trg_expected': trg_filename[1] + ".train.expected"}
98
+
99
+ try:
100
+ _prep_files(box_eval)
101
+ _run_test(configuration, box_eval)
102
+ finally:
103
+ _cleanup_files(box_eval)
104
+
105
+
106
+ def _run_test(configuration, box_eval):
107
+ box_config = configure(configuration)
108
+ box = initialise(box_config)
109
+
110
+ output = eval_pipeline(box, box_eval, box_config)
111
+ for data_set in ['devel', 'eval', 'train']:
112
+ for lang in ['src', 'trg']:
113
+ filename = output[data_set + '_' + lang + '_filename']
114
+ filename_expected = box_eval[data_set + '_' + lang + '_expected']
115
+ thelp.diff(filename_expected, filename)
116
+
117
+
118
+ def _line(line_lengths):
119
+ def _gen_line(tokens):
120
+ return " ".join(map(lambda n: "tok" + str(n), range(tokens)))
121
+ return map(_gen_line, line_lengths)
122
+
123
+
124
+ def _prep_files(box_eval):
125
+ thelp.cat(box_eval['src_filename'], _line(range(50)))
126
+ thelp.cat(box_eval['trg_filename'], _line(range(50)))
127
+ #expected output:
128
+ thelp.cat(box_eval['devel_src_expected'], _line(range(0,13)))
129
+ thelp.cat(box_eval['devel_trg_expected'], _line(range(0,13)))
130
+ thelp.cat(box_eval['eval_src_expected'], _line(range(13,20)))
131
+ thelp.cat(box_eval['eval_trg_expected'], _line(range(13,20)))
132
+ thelp.cat(box_eval['train_src_expected'], _line(range(20,50)))
133
+ thelp.cat(box_eval['train_trg_expected'], _line(range(20,50)))
134
+
135
+
136
+ def _cleanup_files(box_eval):
137
+ try:
138
+ for key, filename in box_eval.items():
139
+ os.unlink(filename)
140
+ except:
141
+ pass
142
+
143
+
144
+ _test_main()
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/test_data/data.de ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ tok0
3
+ tok0 tok1
4
+ tok0 tok1 tok2
5
+ tok0 tok1 tok2 tok3
6
+ tok0 tok1 tok2 tok3 tok4
7
+ tok0 tok1 tok2 tok3 tok4 tok5
8
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6
9
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7
10
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8
11
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9
12
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10
13
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11
14
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12
15
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13
16
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14
17
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15
18
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16
19
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17
20
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18
21
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19
22
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20
23
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21
24
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22
25
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23
26
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24
27
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25
28
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26
29
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27
30
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28
31
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29
32
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30
33
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31
34
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32
35
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33
36
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34
37
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35
38
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36
39
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37
40
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38
41
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39
42
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40
43
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41
44
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42
45
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43
46
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44
47
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45
48
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45 tok46
49
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45 tok46 tok47
50
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45 tok46 tok47 tok48
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/data_split/test_data/data.en ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ tok0
3
+ tok0 tok1
4
+ tok0 tok1 tok2
5
+ tok0 tok1 tok2 tok3
6
+ tok0 tok1 tok2 tok3 tok4
7
+ tok0 tok1 tok2 tok3 tok4 tok5
8
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6
9
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7
10
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8
11
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9
12
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10
13
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11
14
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12
15
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13
16
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14
17
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15
18
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16
19
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17
20
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18
21
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19
22
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20
23
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21
24
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22
25
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23
26
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24
27
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25
28
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26
29
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27
30
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28
31
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29
32
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30
33
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31
34
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32
35
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33
36
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34
37
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35
38
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36
39
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37
40
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38
41
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39
42
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40
43
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41
44
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42
45
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43
46
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44
47
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45
48
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45 tok46
49
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45 tok46 tok47
50
+ tok0 tok1 tok2 tok3 tok4 tok5 tok6 tok7 tok8 tok9 tok10 tok11 tok12 tok13 tok14 tok15 tok16 tok17 tok18 tok19 tok20 tok21 tok22 tok23 tok24 tok25 tok26 tok27 tok28 tok29 tok30 tok31 tok32 tok33 tok34 tok35 tok36 tok37 tok38 tok39 tok40 tok41 tok42 tok43 tok44 tok45 tok46 tok47 tok48
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/irstlm_build/__init__.py ADDED
File without changes
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/irstlm_build/irstlm_build.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import tempfile
5
+
6
+
7
+ def get_name():
8
+ return 'irstlm_build'
9
+
10
+ def get_inputs():
11
+ return ['input_filename']
12
+
13
+ def get_outputs():
14
+ return ['add_start_end_filename', 'lm_filename', 'compiled_lm_filename']
15
+
16
+ def get_configuration():
17
+ return ['irstlm_installation_dir', 'irstlm_smoothing_method', 'language_model_directory']
18
+
19
+ def configure(args):
20
+ config = dict()
21
+ config['irstlm_install_directory'] = args['irstlm_installation_dir']
22
+ config['smoothing_method'] = args['irstlm_smoothing_method']
23
+ config['lm_directory'] = args['language_model_directory']
24
+ return config
25
+
26
+ def initialise(config):
27
+ def process(a, s):
28
+ # Create the LM directory if we need to
29
+ if os.path.exists(config['lm_directory']) is False:
30
+ os.makedirs(config['lm_directory'])
31
+
32
+ # The filename of the file to chew through
33
+ start_end_input_filename = a['input_filename']
34
+ if os.path.exists(start_end_input_filename) is False:
35
+ raise Exception("IRSTLM Build: Input file could not be found at [%s]" % start_end_input_filename)
36
+
37
+ # Derive the output file name for the add start-end marker processor
38
+ filename_bits = os.path.basename(start_end_input_filename).split(".")
39
+ filename_bits[2] = "sb";
40
+ start_end_output_filename = os.path.join(config['lm_directory'], ".".join(filename_bits))
41
+
42
+ # Derive the output file name of the LM build
43
+ filename_bits[2] = "lm"
44
+ lm_filename = os.path.join(config['lm_directory'], ".".join(filename_bits))
45
+
46
+ # Derive the compiled LM file name
47
+ filename_bits[2] = "arpa"
48
+ compiled_lm_filename = os.path.join(config['lm_directory'], ".".join(filename_bits))
49
+
50
+ # First thing to do is add start and end markers
51
+ start_end_cmdline = [os.path.join(config['irstlm_install_directory'], "bin", "add-start-end.sh")]
52
+ infile = open(start_end_input_filename, 'r')
53
+ outfile = open(start_end_output_filename, 'w')
54
+ print "IRSTLM Build: Invoking [%s]..." % " ".join(start_end_cmdline)
55
+ return_code = subprocess.check_call(start_end_cmdline, stdin = infile, stdout = outfile)
56
+ if return_code:
57
+ raise Exception("IRSTLM add start and end markers failed: input file = [%s], output file = [%s], return code = [%d]" % \
58
+ start_end_input_filename, start_end_output_filename, return_code)
59
+
60
+ # Next build the language model
61
+ tmp_dir = tempfile.mkdtemp(dir = "/tmp")
62
+ try:
63
+ build_lm_cmdline = [os.path.join(config['irstlm_install_directory'], "bin", "build-lm.sh"),
64
+ "-i", start_end_output_filename,
65
+ "-t", tmp_dir,
66
+ "-p",
67
+ "-s", config['smoothing_method'],
68
+ "-o", lm_filename]
69
+ print "IRSTLM Build: Invoking [%s]..." % " ".join(build_lm_cmdline)
70
+ return_code = subprocess.check_call(build_lm_cmdline)
71
+ if return_code:
72
+ raise Exception("IRST language model failed to build: return code = [%d]" % return_code)
73
+ finally:
74
+ if os.path.exists(tmp_dir):
75
+ shutil.rmtree(tmp_dir)
76
+
77
+ # Compile the LM
78
+ lm_filename = lm_filename + ".gz"
79
+ compile_lm_cmdline = [os.path.join(config['irstlm_install_directory'], "bin", "compile-lm"),
80
+ "--text", "yes",
81
+ lm_filename,
82
+ compiled_lm_filename]
83
+ print "IRSTLM Build: Invoking [%s]..." % " ".join(compile_lm_cmdline)
84
+ return_code = subprocess.check_call(compile_lm_cmdline)
85
+ if return_code:
86
+ raise Exception("IRST language model compilation failed: return code = [%d]" % return_code)
87
+
88
+ output = {'add_start_end_filename': start_end_output_filename,
89
+ 'lm_filename': lm_filename,
90
+ 'compiled_lm_filename': compiled_lm_filename}
91
+
92
+ print "IRSTLM Build: Output = %s" % output
93
+
94
+ return output
95
+
96
+ return process
97
+
98
+
99
+ if __name__ == '__main__':
100
+ from pypeline.helpers.helpers import eval_pipeline, cons_function_component
101
+
102
+ lm_dir = os.environ["PWD"]
103
+ configuration = {'irstlm_root': os.environ["IRSTLM"],
104
+ 'irstlm_smoothing_method': 'improved-kneser-ney',
105
+ 'language_model_directory': lm_dir}
106
+ component_config = configure(configuration)
107
+ component = initialise(component_config)
108
+
109
+ value = eval_pipeline(cons_function_component(component),
110
+ {'input_filename': '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en'},
111
+ component_config)
112
+ target = {'add_start_end_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'),
113
+ 'lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'),
114
+ 'compiled_lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en')}
115
+ print "Target: %s" % target
116
+ if value != target:
117
+ raise Exception("Massive fail!")
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/mert/__init__.py ADDED
File without changes
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/mert/mert.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+
5
+ def get_name():
6
+ return 'mert'
7
+
8
+ def get_inputs():
9
+ return ['evaluation_data_filename', 'trg_language_model_filename',
10
+ 'trg_language_model_order', 'trg_language_model_type',
11
+ 'moses_ini_filename']
12
+
13
+ def get_outputs():
14
+ return ['moses_ini_filename']
15
+
16
+ def get_configuration():
17
+ return ['source_language', 'target_language',
18
+ 'moses_installation_dir', 'mert_working_directory',
19
+ 'mert_max_no_iterations']
20
+
21
+ def configure(args):
22
+ result = {}
23
+ result['src_lang'] = args['source_language']
24
+ result['trg_lang'] = args['target_language']
25
+ result['moses_installation_dir'] = args['moses_installation_dir']
26
+ result['mert_working_dir'] = args['mert_working_directory']
27
+ result['max_no_iterations'] = args['mert_max_no_iterations']
28
+ return result
29
+
30
+ def initialise(config):
31
+ def process(a, s):
32
+ infilename = os.path.abspath(a['evaluation_data_filename'])
33
+ infilename = ".".join(infilename.split(".")[:-1])
34
+ lm_file = os.path.abspath(a['trg_language_model_filename'])
35
+ lm_order = int(a['trg_language_model_order'])
36
+ lm_type = int(a['trg_language_model_type'])
37
+ max_no_iters = int(config['max_no_iterations'])
38
+ orig_moses_ini = os.path.abspath(a['moses_ini_filename'])
39
+
40
+ if not os.path.exists(orig_moses_ini):
41
+ raise Exception, "Error: Input moses.ini does not exist"
42
+
43
+ workdir = os.path.abspath(config['mert_working_dir'])
44
+ #simply call the training perl script
45
+ #remove the workdir if it is already there
46
+ if os.path.exists(workdir):
47
+ shutil.rmtree(workdir)
48
+ os.makedirs(workdir)
49
+
50
+ #local vars
51
+ moses_install_dir = os.path.abspath(config['moses_installation_dir'])
52
+ mert_perl = os.path.join(moses_install_dir, 'scripts', 'training', 'mert-moses.pl')
53
+ bin_dir = os.path.join(moses_install_dir, 'bin')
54
+ moses_bin = os.path.join(moses_install_dir, 'bin', 'moses')
55
+ src_file = infilename + '.' + config['src_lang']
56
+ ref_file = infilename + '.' + config['trg_lang']
57
+ logfile = os.path.join(workdir, 'log')
58
+ #change lm configuration in moses ini
59
+ moses_ini = os.path.join(workdir, 'trained-moses.ini')
60
+ cmd = r"cat %(orig_moses_ini)s | sed '/\[lmodel-file\]/,/^[[:space:]]*$/c\[lmodel-file\]\n%(lm_type)s 0 %(lm_order)s %(lm_file)s\n' > %(moses_ini)s"
61
+ cmd = cmd % locals()
62
+ os.system(cmd)
63
+
64
+ #the command
65
+ cmd = '%(mert_perl)s --maximum-iterations %(max_no_iters)d --mertdir %(bin_dir)s --working-dir %(workdir)s %(src_file)s %(ref_file)s %(moses_bin)s %(moses_ini)s 2> %(logfile)s'
66
+ cmd = cmd % locals()
67
+
68
+ pipe = subprocess.Popen(cmd, stdin = subprocess.PIPE, stdout = subprocess.PIPE, shell=True)
69
+ pipe.wait()
70
+
71
+ #check the moses ini
72
+ new_mosesini = os.path.join(workdir, 'moses.ini')
73
+ if not os.path.exists(new_mosesini):
74
+ raise Exception, 'Failed MERT'
75
+
76
+ return {'moses_ini_filename' : new_mosesini}
77
+
78
+ return process
79
+
80
+
81
+ if __name__ == '__main__':
82
+ def __test():
83
+ configuration = {'src_lang':'en',
84
+ 'trg_lang':'lt',
85
+ 'moses_installation_dir':os.path.abspath('../../../../'),
86
+ 'mert_working_dir':'../../../../../tuning'}
87
+ values = {'development_data_filename':'../../../../../corpus/tune',
88
+ 'moses_ini_file':'../../../../../model/model/moses.ini',
89
+ 'trg_language_model_filename':'../../../../../corpus/train.lt.lm',
90
+ 'trg_language_model_type':9,
91
+ 'trg_language_model_order':4}
92
+ from pypeline.helpers.helpers import run_pipeline
93
+ box_config = configure(configuration)
94
+ box = initialise(configuration)
95
+ print run_pipeline(box, values, None)
96
+
97
+ #do some test
98
+ __test()
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/model_training/__init__.py ADDED
File without changes
mosesdecoder/contrib/arrow-pipelines/pcl/components/wrappers/model_training/model_training.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+
5
+
6
+ def get_name():
7
+ return 'model_training'
8
+
9
+ def get_inputs():
10
+ return ['src_filename', 'trg_filename']
11
+
12
+ def get_outputs():
13
+ return ['moses_ini_filename']
14
+
15
+ def get_configuration():
16
+ return ['source_language', 'target_language',
17
+ 'moses_installation_dir', 'giza_installation_dir',
18
+ 'translation_model_directory', 'alignment_method',
19
+ 'reordering_method']
20
+
21
+ # Alignment = grow-diag-final-and
22
+ # Reordering = msd-bidirectional-fe
23
+ def configure(args):
24
+ result = {}
25
+ result['src_lang'] = args['source_language']
26
+ result['trg_lang'] = args['target_language']
27
+ result['moses_installation_dir'] = args['moses_installation_dir']
28
+ result['external_bin_dir'] = args['giza_installation_dir']
29
+ result['model_directory'] = args['translation_model_directory']
30
+ result['alignment'] = args['alignment_method']
31
+ result['reordering'] = args['reordering_method']
32
+ return result
33
+
34
+ def initialise(config):
35
+ def process(a, s):
36
+ get_corpora_name_fn = lambda fn: ".".join(os.path.basename(fn).split('.')[:-1])
37
+ src_filename = os.path.abspath(a['src_filename'])
38
+ trg_filename = os.path.abspath(a['trg_filename'])
39
+ src_corpora_name = get_corpora_name_fn(src_filename)
40
+ trg_corpora_name = get_corpora_name_fn(trg_filename)
41
+ if src_corpora_name != trg_corpora_name:
42
+ raise Exception, "Mismatch of source [%s] and target [%s] filename" % (src_filename, trg_filename)
43
+
44
+ infilename = os.path.abspath(os.path.join(os.path.dirname(src_filename), src_corpora_name))
45
+ workdir = os.path.abspath(config['model_directory'])
46
+ #simply call the training perl script
47
+ #remove the workdir if it is already there
48
+ if os.path.exists(workdir):
49
+ shutil.rmtree(workdir)
50
+ os.makedirs(workdir)
51
+
52
+ #local vars
53
+ train_model_perl = os.path.abspath(os.path.join(config['moses_installation_dir'],
54
+ 'scripts',
55
+ 'training',
56
+ 'train-model.perl'))
57
+ src_lang = config['src_lang'].lower()
58
+ trg_lang = config['trg_lang'].lower()
59
+ external_bin = os.path.abspath(config['external_bin_dir'])
60
+ #create a dummy lm file
61
+ dummy_lmfile = os.path.join(workdir, 'dummy.lm')
62
+ f = open(dummy_lmfile, 'w')
63
+ print >> f, "dummy lm file"
64
+ f.close()
65
+ logfile = os.path.join(workdir, 'log')
66
+
67
+ #the command
68
+ alignment_method = config['alignment']
69
+ reordering_method = config['reordering']
70
+ cmd = '%(train_model_perl)s -root-dir %(workdir)s -corpus %(infilename)s ' \
71
+ '-f %(src_lang)s -e %(trg_lang)s -alignment %(alignment_method)s ' \
72
+ '-reordering %(reordering_method)s -lm 0:5:%(dummy_lmfile)s:0 ' \
73
+ '-external-bin-dir %(external_bin)s 2> %(logfile)s'
74
+ cmd = cmd % locals()
75
+
76
+ pipe = subprocess.Popen(cmd, stdin = subprocess.PIPE, stdout = subprocess.PIPE, shell=True)
77
+ pipe.wait()
78
+
79
+ # check the moses ini
80
+ mosesini = os.path.join(workdir, 'model', 'moses.ini')
81
+ if not os.path.exists(mosesini):
82
+ raise Exception, 'Failed training model'
83
+
84
+ return {'moses_ini_filename' : mosesini}
85
+
86
+ return process
87
+
88
+
89
+ if __name__ == '__main__':
90
+ def __test():
91
+ configuration = {'src_lang' : 'en',
92
+ 'trg_lang' : 'lt',
93
+ 'moses_installation_dir' : os.environ['MOSES_HOME'],
94
+ 'giza_installation_dir' : os.environ['GIZA_HOME'],
95
+ 'translation_model_directory' : 'model-dir'}
96
+ values = {'training_data_filename' : '/Users/ianjohnson/work/MTM-2012/corpus/training/cleantrain'}
97
+ from pypeline.helpers.helpers import run_pipeline
98
+ box_config = configure(configuration)
99
+ box = initialise(box_config)
100
+ print run_pipeline(box, values, None)
101
+
102
+ #do some test
103
+ __test()
mosesdecoder/contrib/arrow-pipelines/pcl/training_pipeline.cfg ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Configuration]
2
+ source_language = en
3
+ target_language = lt
4
+ max_segment_length = 20
5
+ corpus_development_size = 1000
6
+ corpus_evaluation_size = 500
7
+ alignment_method = grow-diag-final-and
8
+ reordering_method = msd-bidirectional-fe
9
+ smoothing_method = improved-kneser-ney
10
+ tokenisation_directory = training/tokenisation
11
+ translation_model_directory = training/model
12
+ language_model_directory = training/lm
13
+ mert_directory = training/mert
14
+ mert_max_no_iterations = 10
15
+ moses_installation_directory = $(MOSES_HOME)
16
+ giza_installation_directory = $(GIZA_HOME)
17
+ irstlm_installation_directory = $(IRSTLM)
18
+
19
+ [Inputs]
20
+ src_filename = ../test_data/cleantrain.en
21
+ trg_filename = ../test_data/cleantrain.lt
mosesdecoder/contrib/arrow-pipelines/pcl/training_pipeline.pcl ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Import all of the components to be composed
3
+ #
4
+ import components.src_trg_tokeniser as tokeniser
5
+ import components.translation_model_training as model_training
6
+ import components.wrappers.irstlm_build.irstlm_build as lang_model
7
+ import components.wrappers.mert.mert as mert
8
+
9
+ #
10
+ # Component definition
11
+ #
12
+ # Config: {model_training.max_segment_length,
13
+ # model_training.corpus.[development_size|evaluation_size],
14
+ # model_training.[src|trg].language,
15
+ # model_training.method.[alignment|reordering], {moses_ini_filename,
16
+ # model_training.giza.installation, evaluation_data_filename}
17
+ # {src_filename, {tokenised_src_filename, model_training.translation_model.dir} |
18
+ # trg_filename} tokenised_trg_filename} +-----------------------------------------+ +-------+ | {moses_ini_filename}
19
+ # | +-------+ +-------+ +-------+ | +-------+ | tokenised_src_filename -> src_filename, | | Model | V +-------+ |
20
+ # V | +--->+ Src/ +--->+ | V | +-->+ tokenised_trg_filename -> trg_filename +-->+ Train +------>+ | +------+ V
21
+ # --->+ Split | | Trg | | Merge +--->+ Split | +-----------------------------------------+ +-------+ | Merge +----->+ MERT +--->
22
+ # | +--->+ Token +--->+ | | +--\ +------------------------------------------+ +--------+ | | ^ +------+
23
+ # +-------+ +-------+ +-------+ +-------+ \->+ tokenised_trg_filename -> input_filename +-->+ IRSTLM +-->+ | |
24
+ # Config: {tokeniser.[src|trg].language, +------------------------------------------+ +--------+ ^ +-------+ |
25
+ # tokeniser.[src|trg].tokeniser_dir Config: {irstlm_installation_dir::String, | |
26
+ # tokeniser.moses.installation} irstlm_smoothing_method::String, | |
27
+ # language_model_directory} | |
28
+ # | |
29
+ # {lm_filename, compiled_lm_filename, add_start_end_filename} |
30
+ # |
31
+ # {moses_ini_file, evaluation_data_filename, trg_language_model_filename,
32
+ # trg_language_model_order, trg_language_model_type}
33
+ #
34
+ component training_pipeline
35
+ inputs src_filename, trg_filename
36
+ output moses_ini_filename
37
+ configuration source_language,
38
+ target_language,
39
+ max_segment_length,
40
+ corpus_development_size,
41
+ corpus_evaluation_size,
42
+ alignment_method,
43
+ reordering_method,
44
+ smoothing_method,
45
+ tokenisation_directory,
46
+ translation_model_directory,
47
+ language_model_directory,
48
+ mert_directory,
49
+ mert_max_no_iterations,
50
+ moses_installation_directory,
51
+ giza_installation_directory,
52
+ irstlm_installation_directory
53
+ declare
54
+ tokeniser := new tokeniser with
55
+ source_language -> tokeniser.src.language,
56
+ target_language -> tokeniser.trg.language,
57
+ tokenisation_directory -> tokeniser.src.tokenisation_dir,
58
+ tokenisation_directory -> tokeniser.trg.tokenisation_dir,
59
+ moses_installation_directory -> tokeniser.moses.installation
60
+ model_training := new model_training with
61
+ max_segment_length -> model_training.max_segment_length,
62
+ corpus_development_size -> model_training.corpus.development_size,
63
+ corpus_evaluation_size -> model_training.corpus.evaluation_size,
64
+ translation_model_directory -> model_training.translation_model.dir,
65
+ alignment_method -> model_training.method.alignment,
66
+ reordering_method -> model_training.method.reordering,
67
+ source_language -> model_training.src.language,
68
+ moses_installation_directory -> model_training.moses.installation,
69
+ giza_installation_directory -> model_training.giza.installation,
70
+ target_language -> model_training.trg.language
71
+ irstlm := new lang_model with
72
+ irstlm_installation_directory -> irstlm_installation_dir,
73
+ smoothing_method -> irstlm_smoothing_method,
74
+ language_model_directory -> language_model_directory
75
+ mert := new mert with
76
+ source_language -> source_language,
77
+ target_language -> target_language,
78
+ moses_installation_directory -> moses_installation_dir,
79
+ mert_directory -> mert_working_directory,
80
+ mert_max_no_iterations -> mert_max_no_iterations
81
+ as
82
+ # Split and transform the input to the tokeniser component
83
+ # Inputs: src_filename, trg_filename
84
+ # Outputs: (tokenised_src_filename), (tokenised_trg_filename)
85
+ (wire src_filename -> src_filename,
86
+ trg_filename -> _ &&&
87
+ wire trg_filename -> trg_filename,
88
+ src_filename -> _) >>>
89
+ tokeniser >>>
90
+
91
+ # Merge output from tokeniser
92
+ # Inputs: (tokenised_src_filename), (tokenised_trg_filename)
93
+ # Outputs: tokenised_src_filename, tokenised_trg_filename
94
+ merge top[tokenised_src_filename] -> tokenised_src_filename,
95
+ bottom[tokenised_trg_filename] -> tokenised_trg_filename >>>
96
+
97
+ # Train the translation table and target language model
98
+ # Inputs: tokenised_src_filename, tokenised_trg_filename
99
+ # Outputs: (moses_ini_filename), ('add_start_end_filename', 'lm_filename', 'compiled_lm_filename')
100
+ ((wire tokenised_src_filename -> src_filename,
101
+ tokenised_trg_filename -> trg_filename >>> model_training) &&&
102
+ (wire tokenised_trg_filename -> input_filename,
103
+ tokenised_src_filename -> _ >>> irstlm)) >>>
104
+
105
+ # Merge the output from the TT and LM training component
106
+ # Inputs: (moses_ini_filename, evaluation_data_filename),
107
+ # (compiled_lm_filename, add_start_end_filename, lm_filename)
108
+ # Outputs: moses_ini_filename, evaluation_data_filename, evaluation_data_filename,
109
+ # trg_language_model_filename, trg_language_model_order, trg_language_model_type
110
+ merge top[moses_ini_filename] -> moses_ini_filename,
111
+ top[evaluation_data_filename] -> evaluation_data_filename,
112
+ bottom[compiled_lm_filename] -> trg_language_model_filename,
113
+ bottom[add_start_end_filename] -> _,
114
+ bottom[lm_filename] -> _,
115
+ 3 -> trg_language_model_order,
116
+ 9 -> trg_language_model_type >>>
117
+ mert
mosesdecoder/contrib/checkplf/Makefile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ checkplf: checkplf.cpp
2
+ g++ checkplf.cpp -I../../moses/ ../../lib/libmoses.a -o checkplf
mosesdecoder/contrib/lmserver/Makefile.in ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Makefile.in generated by automake 1.9.2 from Makefile.am.
2
+ # @configure_input@
3
+
4
+ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
5
+ # 2003, 2004 Free Software Foundation, Inc.
6
+ # This Makefile.in is free software; the Free Software Foundation
7
+ # gives unlimited permission to copy and/or distribute it,
8
+ # with or without modifications, as long as this notice is preserved.
9
+
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY, to the extent permitted by law; without
12
+ # even the implied warranty of MERCHANTABILITY or FITNESS FOR A
13
+ # PARTICULAR PURPOSE.
14
+
15
+ @SET_MAKE@
16
+
17
+ SOURCES = $(lmserver_SOURCES) $(lmserver_debug_SOURCES)
18
+
19
+ srcdir = @srcdir@
20
+ top_srcdir = @top_srcdir@
21
+ VPATH = @srcdir@
22
+ pkgdatadir = $(datadir)/@PACKAGE@
23
+ pkglibdir = $(libdir)/@PACKAGE@
24
+ pkgincludedir = $(includedir)/@PACKAGE@
25
+ top_builddir = .
26
+ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
27
+ INSTALL = @INSTALL@
28
+ install_sh_DATA = $(install_sh) -c -m 644
29
+ install_sh_PROGRAM = $(install_sh) -c
30
+ install_sh_SCRIPT = $(install_sh) -c
31
+ INSTALL_HEADER = $(INSTALL_DATA)
32
+ transform = $(program_transform_name)
33
+ NORMAL_INSTALL = :
34
+ PRE_INSTALL = :
35
+ POST_INSTALL = :
36
+ NORMAL_UNINSTALL = :
37
+ PRE_UNINSTALL = :
38
+ POST_UNINSTALL = :
39
+ build_triplet = @build@
40
+ host_triplet = @host@
41
+ target_triplet = @target@
42
+ bin_PROGRAMS = lmserver$(EXEEXT) lmserver-debug$(EXEEXT)
43
+ DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
44
+ $(srcdir)/Makefile.in $(srcdir)/config.h.in \
45
+ $(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \
46
+ compile config.guess config.sub depcomp install-sh missing
47
+ subdir = .
48
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
49
+ am__aclocal_m4_deps = $(top_srcdir)/configure.ac
50
+ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
51
+ $(ACLOCAL_M4)
52
+ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
53
+ configure.lineno configure.status.lineno
54
+ mkinstalldirs = $(install_sh) -d
55
+ CONFIG_HEADER = config.h
56
+ CONFIG_CLEAN_FILES =
57
+ am__installdirs = "$(DESTDIR)$(bindir)"
58
+ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
59
+ PROGRAMS = $(bin_PROGRAMS)
60
+ am_lmserver_OBJECTS = lmserver-lmserver.$(OBJEXT) \
61
+ lmserver-thread.$(OBJEXT) lmserver-srilm.$(OBJEXT)
62
+ lmserver_OBJECTS = $(am_lmserver_OBJECTS)
63
+ am__objects_1 = lmserver.$(OBJEXT) thread.$(OBJEXT) srilm.$(OBJEXT)
64
+ am_lmserver_debug_OBJECTS = $(am__objects_1)
65
+ lmserver_debug_OBJECTS = $(am_lmserver_debug_OBJECTS)
66
+ DEFAULT_INCLUDES = -I. -I$(srcdir) -I.
67
+ depcomp = $(SHELL) $(top_srcdir)/depcomp
68
+ am__depfiles_maybe = depfiles
69
+ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
70
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
71
+ CCLD = $(CC)
72
+ LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
73
+ CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
74
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
75
+ CXXLD = $(CXX)
76
+ CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
77
+ -o $@
78
+ SOURCES = $(lmserver_SOURCES) $(lmserver_debug_SOURCES)
79
+ DIST_SOURCES = $(lmserver_SOURCES) $(lmserver_debug_SOURCES)
80
+ ETAGS = etags
81
+ CTAGS = ctags
82
+ DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
83
+ distdir = $(PACKAGE)-$(VERSION)
84
+ top_distdir = $(distdir)
85
+ am__remove_distdir = \
86
+ { test ! -d $(distdir) \
87
+ || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \
88
+ && rm -fr $(distdir); }; }
89
+ DIST_ARCHIVES = $(distdir).tar.gz
90
+ GZIP_ENV = --best
91
+ distuninstallcheck_listfiles = find . -type f -print
92
+ distcleancheck_listfiles = find . -type f -print
93
+ ACLOCAL = @ACLOCAL@
94
+ AMDEP_FALSE = @AMDEP_FALSE@
95
+ AMDEP_TRUE = @AMDEP_TRUE@
96
+ AMTAR = @AMTAR@
97
+ AUTOCONF = @AUTOCONF@
98
+ AUTOHEADER = @AUTOHEADER@
99
+ AUTOMAKE = @AUTOMAKE@
100
+ AWK = @AWK@
101
+ CC = @CC@
102
+ CCDEPMODE = @CCDEPMODE@
103
+ CFLAGS = @CFLAGS@
104
+ CPP = @CPP@
105
+ CPPFLAGS = @CPPFLAGS@
106
+ CXX = @CXX@
107
+ CXXDEPMODE = @CXXDEPMODE@
108
+ CXXFLAGS = @CXXFLAGS@
109
+ CYGPATH_W = @CYGPATH_W@
110
+ DAEMON_OBJ = @DAEMON_OBJ@
111
+ DEFS = @DEFS@
112
+ DEPDIR = @DEPDIR@
113
+ ECHO_C = @ECHO_C@
114
+ ECHO_N = @ECHO_N@
115
+ ECHO_T = @ECHO_T@
116
+ EGREP = @EGREP@
117
+ EXEEXT = @EXEEXT@
118
+ INSTALL_DATA = @INSTALL_DATA@
119
+ INSTALL_PROGRAM = @INSTALL_PROGRAM@
120
+ INSTALL_SCRIPT = @INSTALL_SCRIPT@
121
+ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
122
+ LDFLAGS = @LDFLAGS@
123
+ LIBOBJS = @LIBOBJS@
124
+ LIBS = @LIBS@
125
+ LTLIBOBJS = @LTLIBOBJS@
126
+ MAKEINFO = @MAKEINFO@
127
+ OBJEXT = @OBJEXT@
128
+ PACKAGE = @PACKAGE@
129
+ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
130
+ PACKAGE_NAME = @PACKAGE_NAME@
131
+ PACKAGE_STRING = @PACKAGE_STRING@
132
+ PACKAGE_TARNAME = @PACKAGE_TARNAME@
133
+ PACKAGE_VERSION = @PACKAGE_VERSION@
134
+ PATH_SEPARATOR = @PATH_SEPARATOR@
135
+ SET_MAKE = @SET_MAKE@
136
+ SHELL = @SHELL@
137
+ SRI_LM_FALSE = @SRI_LM_FALSE@
138
+ SRI_LM_TRUE = @SRI_LM_TRUE@
139
+ STRIP = @STRIP@
140
+ VERSION = @VERSION@
141
+ ac_ct_CC = @ac_ct_CC@
142
+ ac_ct_CXX = @ac_ct_CXX@
143
+ ac_ct_STRIP = @ac_ct_STRIP@
144
+ am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
145
+ am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
146
+ am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
147
+ am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
148
+ am__include = @am__include@
149
+ am__leading_dot = @am__leading_dot@
150
+ am__quote = @am__quote@
151
+ am__tar = @am__tar@
152
+ am__untar = @am__untar@
153
+ bindir = @bindir@
154
+ build = @build@
155
+ build_alias = @build_alias@
156
+ build_cpu = @build_cpu@
157
+ build_os = @build_os@
158
+ build_vendor = @build_vendor@
159
+ datadir = @datadir@
160
+ exec_prefix = @exec_prefix@
161
+ host = @host@
162
+ host_alias = @host_alias@
163
+ host_cpu = @host_cpu@
164
+ host_os = @host_os@
165
+ host_vendor = @host_vendor@
166
+ includedir = @includedir@
167
+ infodir = @infodir@
168
+ install_sh = @install_sh@
169
+ libdir = @libdir@
170
+ libexecdir = @libexecdir@
171
+ localstatedir = @localstatedir@
172
+ mandir = @mandir@
173
+ mkdir_p = @mkdir_p@
174
+ oldincludedir = @oldincludedir@
175
+ prefix = @prefix@
176
+ program_transform_name = @program_transform_name@
177
+ sbindir = @sbindir@
178
+ sharedstatedir = @sharedstatedir@
179
+ sysconfdir = @sysconfdir@
180
+ target = @target@
181
+ target_alias = @target_alias@
182
+ target_cpu = @target_cpu@
183
+ target_os = @target_os@
184
+ target_vendor = @target_vendor@
185
+ lmserver_SOURCES = lmserver.c lmserver.h thread.c srilm.cc
186
+ lmserver_debug_SOURCES = $(lmserver_SOURCES)
187
+ lmserver_CPPFLAGS = -DNDEBUG
188
+ lmserver_LDADD = @DAEMON_OBJ@
189
+ lmserver_debug_LDADD = @DAEMON_OBJ@
190
+ lmserver_DEPENDENCIES = @DAEMON_OBJ@
191
+ lmserver_debug_DEPENDENCIES = @DAEMON_OBJ@
192
+ DIST_DIRS = examples
193
+ EXTRA_DIST = examples daemon.c
194
+ all: config.h
195
+ $(MAKE) $(AM_MAKEFLAGS) all-am
196
+
197
+ .SUFFIXES:
198
+ .SUFFIXES: .c .cc .o .obj
199
+ am--refresh:
200
+ @:
201
+ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
202
+ @for dep in $?; do \
203
+ case '$(am__configure_deps)' in \
204
+ *$$dep*) \
205
+ echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \
206
+ cd $(srcdir) && $(AUTOMAKE) --gnu \
207
+ && exit 0; \
208
+ exit 1;; \
209
+ esac; \
210
+ done; \
211
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
212
+ cd $(top_srcdir) && \
213
+ $(AUTOMAKE) --gnu Makefile
214
+ .PRECIOUS: Makefile
215
+ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
216
+ @case '$?' in \
217
+ *config.status*) \
218
+ echo ' $(SHELL) ./config.status'; \
219
+ $(SHELL) ./config.status;; \
220
+ *) \
221
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
222
+ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
223
+ esac;
224
+
225
+ $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
226
+ $(SHELL) ./config.status --recheck
227
+
228
+ $(top_srcdir)/configure: $(am__configure_deps)
229
+ cd $(srcdir) && $(AUTOCONF)
230
+ $(ACLOCAL_M4): $(am__aclocal_m4_deps)
231
+ cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
232
+
233
+ config.h: stamp-h1
234
+ @if test ! -f $@; then \
235
+ rm -f stamp-h1; \
236
+ $(MAKE) stamp-h1; \
237
+ else :; fi
238
+
239
+ stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
240
+ @rm -f stamp-h1
241
+ cd $(top_builddir) && $(SHELL) ./config.status config.h
242
+ $(srcdir)/config.h.in: $(am__configure_deps)
243
+ cd $(top_srcdir) && $(AUTOHEADER)
244
+ rm -f stamp-h1
245
+ touch $@
246
+
247
+ distclean-hdr:
248
+ -rm -f config.h stamp-h1
249
+ install-binPROGRAMS: $(bin_PROGRAMS)
250
+ @$(NORMAL_INSTALL)
251
+ test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)"
252
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
253
+ p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
254
+ if test -f $$p \
255
+ ; then \
256
+ f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
257
+ echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
258
+ $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
259
+ else :; fi; \
260
+ done
261
+
262
+ uninstall-binPROGRAMS:
263
+ @$(NORMAL_UNINSTALL)
264
+ @list='$(bin_PROGRAMS)'; for p in $$list; do \
265
+ f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
266
+ echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
267
+ rm -f "$(DESTDIR)$(bindir)/$$f"; \
268
+ done
269
+
270
+ clean-binPROGRAMS:
271
+ -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
272
+ lmserver$(EXEEXT): $(lmserver_OBJECTS) $(lmserver_DEPENDENCIES)
273
+ @rm -f lmserver$(EXEEXT)
274
+ $(CXXLINK) $(lmserver_LDFLAGS) $(lmserver_OBJECTS) $(lmserver_LDADD) $(LIBS)
275
+ lmserver-debug$(EXEEXT): $(lmserver_debug_OBJECTS) $(lmserver_debug_DEPENDENCIES)
276
+ @rm -f lmserver-debug$(EXEEXT)
277
+ $(CXXLINK) $(lmserver_debug_LDFLAGS) $(lmserver_debug_OBJECTS) $(lmserver_debug_LDADD) $(LIBS)
278
+
279
+ mostlyclean-compile:
280
+ -rm -f *.$(OBJEXT)
281
+
282
+ distclean-compile:
283
+ -rm -f *.tab.c
284
+
285
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lmserver-lmserver.Po@am__quote@
286
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lmserver-srilm.Po@am__quote@
287
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lmserver-thread.Po@am__quote@
288
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lmserver.Po@am__quote@
289
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srilm.Po@am__quote@
290
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thread.Po@am__quote@
291
+
292
+ .c.o:
293
+ @am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
294
+ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
295
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
296
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
297
+ @am__fastdepCC_FALSE@ $(COMPILE) -c $<
298
+
299
+ .c.obj:
300
+ @am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
301
+ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
302
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
303
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
304
+ @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
305
+
306
+ lmserver-lmserver.o: lmserver.c
307
+ @am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lmserver-lmserver.o -MD -MP -MF "$(DEPDIR)/lmserver-lmserver.Tpo" -c -o lmserver-lmserver.o `test -f 'lmserver.c' || echo '$(srcdir)/'`lmserver.c; \
308
+ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lmserver-lmserver.Tpo" "$(DEPDIR)/lmserver-lmserver.Po"; else rm -f "$(DEPDIR)/lmserver-lmserver.Tpo"; exit 1; fi
309
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='lmserver.c' object='lmserver-lmserver.o' libtool=no @AMDEPBACKSLASH@
310
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
311
+ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lmserver-lmserver.o `test -f 'lmserver.c' || echo '$(srcdir)/'`lmserver.c
312
+
313
+ lmserver-lmserver.obj: lmserver.c
314
+ @am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lmserver-lmserver.obj -MD -MP -MF "$(DEPDIR)/lmserver-lmserver.Tpo" -c -o lmserver-lmserver.obj `if test -f 'lmserver.c'; then $(CYGPATH_W) 'lmserver.c'; else $(CYGPATH_W) '$(srcdir)/lmserver.c'; fi`; \
315
+ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lmserver-lmserver.Tpo" "$(DEPDIR)/lmserver-lmserver.Po"; else rm -f "$(DEPDIR)/lmserver-lmserver.Tpo"; exit 1; fi
316
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='lmserver.c' object='lmserver-lmserver.obj' libtool=no @AMDEPBACKSLASH@
317
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
318
+ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lmserver-lmserver.obj `if test -f 'lmserver.c'; then $(CYGPATH_W) 'lmserver.c'; else $(CYGPATH_W) '$(srcdir)/lmserver.c'; fi`
319
+
320
+ lmserver-thread.o: thread.c
321
+ @am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lmserver-thread.o -MD -MP -MF "$(DEPDIR)/lmserver-thread.Tpo" -c -o lmserver-thread.o `test -f 'thread.c' || echo '$(srcdir)/'`thread.c; \
322
+ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lmserver-thread.Tpo" "$(DEPDIR)/lmserver-thread.Po"; else rm -f "$(DEPDIR)/lmserver-thread.Tpo"; exit 1; fi
323
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='thread.c' object='lmserver-thread.o' libtool=no @AMDEPBACKSLASH@
324
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
325
+ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lmserver-thread.o `test -f 'thread.c' || echo '$(srcdir)/'`thread.c
326
+
327
+ lmserver-thread.obj: thread.c
328
+ @am__fastdepCC_TRUE@ if $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT lmserver-thread.obj -MD -MP -MF "$(DEPDIR)/lmserver-thread.Tpo" -c -o lmserver-thread.obj `if test -f 'thread.c'; then $(CYGPATH_W) 'thread.c'; else $(CYGPATH_W) '$(srcdir)/thread.c'; fi`; \
329
+ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/lmserver-thread.Tpo" "$(DEPDIR)/lmserver-thread.Po"; else rm -f "$(DEPDIR)/lmserver-thread.Tpo"; exit 1; fi
330
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='thread.c' object='lmserver-thread.obj' libtool=no @AMDEPBACKSLASH@
331
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
332
+ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o lmserver-thread.obj `if test -f 'thread.c'; then $(CYGPATH_W) 'thread.c'; else $(CYGPATH_W) '$(srcdir)/thread.c'; fi`
333
+
334
+ .cc.o:
335
+ @am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
336
+ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
337
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
338
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
339
+ @am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
340
+
341
+ .cc.obj:
342
+ @am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
343
+ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
344
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
345
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
346
+ @am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
347
+
348
+ lmserver-srilm.o: srilm.cc
349
+ @am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT lmserver-srilm.o -MD -MP -MF "$(DEPDIR)/lmserver-srilm.Tpo" -c -o lmserver-srilm.o `test -f 'srilm.cc' || echo '$(srcdir)/'`srilm.cc; \
350
+ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/lmserver-srilm.Tpo" "$(DEPDIR)/lmserver-srilm.Po"; else rm -f "$(DEPDIR)/lmserver-srilm.Tpo"; exit 1; fi
351
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='srilm.cc' object='lmserver-srilm.o' libtool=no @AMDEPBACKSLASH@
352
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
353
+ @am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o lmserver-srilm.o `test -f 'srilm.cc' || echo '$(srcdir)/'`srilm.cc
354
+
355
+ lmserver-srilm.obj: srilm.cc
356
+ @am__fastdepCXX_TRUE@ if $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT lmserver-srilm.obj -MD -MP -MF "$(DEPDIR)/lmserver-srilm.Tpo" -c -o lmserver-srilm.obj `if test -f 'srilm.cc'; then $(CYGPATH_W) 'srilm.cc'; else $(CYGPATH_W) '$(srcdir)/srilm.cc'; fi`; \
357
+ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/lmserver-srilm.Tpo" "$(DEPDIR)/lmserver-srilm.Po"; else rm -f "$(DEPDIR)/lmserver-srilm.Tpo"; exit 1; fi
358
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='srilm.cc' object='lmserver-srilm.obj' libtool=no @AMDEPBACKSLASH@
359
+ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
360
+ @am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(lmserver_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o lmserver-srilm.obj `if test -f 'srilm.cc'; then $(CYGPATH_W) 'srilm.cc'; else $(CYGPATH_W) '$(srcdir)/srilm.cc'; fi`
361
+ uninstall-info-am:
362
+
363
+ ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
364
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
365
+ unique=`for i in $$list; do \
366
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
367
+ done | \
368
+ $(AWK) ' { files[$$0] = 1; } \
369
+ END { for (i in files) print i; }'`; \
370
+ mkid -fID $$unique
371
+ tags: TAGS
372
+
373
+ TAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
374
+ $(TAGS_FILES) $(LISP)
375
+ tags=; \
376
+ here=`pwd`; \
377
+ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
378
+ unique=`for i in $$list; do \
379
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
380
+ done | \
381
+ $(AWK) ' { files[$$0] = 1; } \
382
+ END { for (i in files) print i; }'`; \
383
+ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
384
+ test -n "$$unique" || unique=$$empty_fix; \
385
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
386
+ $$tags $$unique; \
387
+ fi
388
+ ctags: CTAGS
389
+ CTAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
390
+ $(TAGS_FILES) $(LISP)
391
+ tags=; \
392
+ here=`pwd`; \
393
+ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
394
+ unique=`for i in $$list; do \
395
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
396
+ done | \
397
+ $(AWK) ' { files[$$0] = 1; } \
398
+ END { for (i in files) print i; }'`; \
399
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
400
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
401
+ $$tags $$unique
402
+
403
+ GTAGS:
404
+ here=`$(am__cd) $(top_builddir) && pwd` \
405
+ && cd $(top_srcdir) \
406
+ && gtags -i $(GTAGS_ARGS) $$here
407
+
408
+ distclean-tags:
409
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
410
+
411
+ distdir: $(DISTFILES)
412
+ $(am__remove_distdir)
413
+ mkdir $(distdir)
414
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
415
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
416
+ list='$(DISTFILES)'; for file in $$list; do \
417
+ case $$file in \
418
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
419
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
420
+ esac; \
421
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
422
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
423
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
424
+ dir="/$$dir"; \
425
+ $(mkdir_p) "$(distdir)$$dir"; \
426
+ else \
427
+ dir=''; \
428
+ fi; \
429
+ if test -d $$d/$$file; then \
430
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
431
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
432
+ fi; \
433
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
434
+ else \
435
+ test -f $(distdir)/$$file \
436
+ || cp -p $$d/$$file $(distdir)/$$file \
437
+ || exit 1; \
438
+ fi; \
439
+ done
440
+ $(MAKE) $(AM_MAKEFLAGS) \
441
+ top_distdir="$(top_distdir)" distdir="$(distdir)" \
442
+ dist-hook
443
+ -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
444
+ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
445
+ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
446
+ ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
447
+ || chmod -R a+r $(distdir)
448
+ dist-gzip: distdir
449
+ tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
450
+ $(am__remove_distdir)
451
+
452
+ dist-bzip2: distdir
453
+ tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
454
+ $(am__remove_distdir)
455
+
456
+ dist-tarZ: distdir
457
+ tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
458
+ $(am__remove_distdir)
459
+
460
+ dist-shar: distdir
461
+ shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
462
+ $(am__remove_distdir)
463
+
464
+ dist-zip: distdir
465
+ -rm -f $(distdir).zip
466
+ zip -rq $(distdir).zip $(distdir)
467
+ $(am__remove_distdir)
468
+
469
+ dist dist-all: distdir
470
+ tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
471
+ $(am__remove_distdir)
472
+
473
+ # This target untars the dist file and tries a VPATH configuration. Then
474
+ # it guarantees that the distribution is self-contained by making another
475
+ # tarfile.
476
+ distcheck: dist
477
+ case '$(DIST_ARCHIVES)' in \
478
+ *.tar.gz*) \
479
+ GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\
480
+ *.tar.bz2*) \
481
+ bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\
482
+ *.tar.Z*) \
483
+ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
484
+ *.shar.gz*) \
485
+ GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\
486
+ *.zip*) \
487
+ unzip $(distdir).zip ;;\
488
+ esac
489
+ chmod -R a-w $(distdir); chmod a+w $(distdir)
490
+ mkdir $(distdir)/_build
491
+ mkdir $(distdir)/_inst
492
+ chmod a-w $(distdir)
493
+ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
494
+ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
495
+ && cd $(distdir)/_build \
496
+ && ../configure --srcdir=.. --prefix="$$dc_install_base" \
497
+ $(DISTCHECK_CONFIGURE_FLAGS) \
498
+ && $(MAKE) $(AM_MAKEFLAGS) \
499
+ && $(MAKE) $(AM_MAKEFLAGS) dvi \
500
+ && $(MAKE) $(AM_MAKEFLAGS) check \
501
+ && $(MAKE) $(AM_MAKEFLAGS) install \
502
+ && $(MAKE) $(AM_MAKEFLAGS) installcheck \
503
+ && $(MAKE) $(AM_MAKEFLAGS) uninstall \
504
+ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
505
+ distuninstallcheck \
506
+ && chmod -R a-w "$$dc_install_base" \
507
+ && ({ \
508
+ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
509
+ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
510
+ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
511
+ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
512
+ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
513
+ } || { rm -rf "$$dc_destdir"; exit 1; }) \
514
+ && rm -rf "$$dc_destdir" \
515
+ && $(MAKE) $(AM_MAKEFLAGS) dist \
516
+ && rm -rf $(DIST_ARCHIVES) \
517
+ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck
518
+ $(am__remove_distdir)
519
+ @(echo "$(distdir) archives ready for distribution: "; \
520
+ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
521
+ sed -e '1{h;s/./=/g;p;x;}' -e '$${p;x;}'
522
+ distuninstallcheck:
523
+ @cd $(distuninstallcheck_dir) \
524
+ && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
525
+ || { echo "ERROR: files left after uninstall:" ; \
526
+ if test -n "$(DESTDIR)"; then \
527
+ echo " (check DESTDIR support)"; \
528
+ fi ; \
529
+ $(distuninstallcheck_listfiles) ; \
530
+ exit 1; } >&2
531
+ distcleancheck: distclean
532
+ @if test '$(srcdir)' = . ; then \
533
+ echo "ERROR: distcleancheck can only run from a VPATH build" ; \
534
+ exit 1 ; \
535
+ fi
536
+ @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
537
+ || { echo "ERROR: files left in build directory after distclean:" ; \
538
+ $(distcleancheck_listfiles) ; \
539
+ exit 1; } >&2
540
+ check-am: all-am
541
+ check: check-am
542
+ all-am: Makefile $(PROGRAMS) config.h
543
+ installdirs:
544
+ for dir in "$(DESTDIR)$(bindir)"; do \
545
+ test -z "$$dir" || $(mkdir_p) "$$dir"; \
546
+ done
547
+ install: install-am
548
+ install-exec: install-exec-am
549
+ install-data: install-data-am
550
+ uninstall: uninstall-am
551
+
552
+ install-am: all-am
553
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
554
+
555
+ installcheck: installcheck-am
556
+ install-strip:
557
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
558
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
559
+ `test -z '$(STRIP)' || \
560
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
561
+ mostlyclean-generic:
562
+
563
+ clean-generic:
564
+
565
+ distclean-generic:
566
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
567
+
568
+ maintainer-clean-generic:
569
+ @echo "This command is intended for maintainers to use"
570
+ @echo "it deletes files that may require special tools to rebuild."
571
+ clean: clean-am
572
+
573
+ clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
574
+
575
+ distclean: distclean-am
576
+ -rm -f $(am__CONFIG_DISTCLEAN_FILES)
577
+ -rm -rf ./$(DEPDIR)
578
+ -rm -f Makefile
579
+ distclean-am: clean-am distclean-compile distclean-generic \
580
+ distclean-hdr distclean-tags
581
+
582
+ dvi: dvi-am
583
+
584
+ dvi-am:
585
+
586
+ html: html-am
587
+
588
+ info: info-am
589
+
590
+ info-am:
591
+
592
+ install-data-am:
593
+
594
+ install-exec-am: install-binPROGRAMS
595
+
596
+ install-info: install-info-am
597
+
598
+ install-man:
599
+
600
+ installcheck-am:
601
+
602
+ maintainer-clean: maintainer-clean-am
603
+ -rm -f $(am__CONFIG_DISTCLEAN_FILES)
604
+ -rm -rf $(top_srcdir)/autom4te.cache
605
+ -rm -rf ./$(DEPDIR)
606
+ -rm -f Makefile
607
+ maintainer-clean-am: distclean-am maintainer-clean-generic
608
+
609
+ mostlyclean: mostlyclean-am
610
+
611
+ mostlyclean-am: mostlyclean-compile mostlyclean-generic
612
+
613
+ pdf: pdf-am
614
+
615
+ pdf-am:
616
+
617
+ ps: ps-am
618
+
619
+ ps-am:
620
+
621
+ uninstall-am: uninstall-binPROGRAMS uninstall-info-am
622
+
623
+ .PHONY: CTAGS GTAGS all all-am am--refresh check check-am clean \
624
+ clean-binPROGRAMS clean-generic ctags dist dist-all dist-bzip2 \
625
+ dist-gzip dist-hook dist-shar dist-tarZ dist-zip distcheck \
626
+ distclean distclean-compile distclean-generic distclean-hdr \
627
+ distclean-tags distcleancheck distdir distuninstallcheck dvi \
628
+ dvi-am html html-am info info-am install install-am \
629
+ install-binPROGRAMS install-data install-data-am install-exec \
630
+ install-exec-am install-info install-info-am install-man \
631
+ install-strip installcheck installcheck-am installdirs \
632
+ maintainer-clean maintainer-clean-generic mostlyclean \
633
+ mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
634
+ tags uninstall uninstall-am uninstall-binPROGRAMS \
635
+ uninstall-info-am
636
+
637
+
638
+ dist-hook:
639
+ rm -rf $(distdir)/doc/.svn/
640
+ rm -rf $(distdir)/scripts/.svn/
641
+ rm -rf $(distdir)/t/.svn/
642
+ rm -rf $(distdir)/t/lib/.svn/
643
+ # Tell versions [3.59,3.63) of GNU make to not export all variables.
644
+ # Otherwise a system limit (for SysV at least) may be exceeded.
645
+ .NOEXPORT:
mosesdecoder/contrib/lmserver/daemon.c ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* $Header: /cvsroot/wikipedia/willow/src/bin/willow/daemon.c,v 1.1 2005/05/02 19:15:21 kateturner Exp $ */
2
+ /* $NetBSD: daemon.c,v 1.9 2003/08/07 16:42:46 agc Exp $ */
3
+ /*-
4
+ * Copyright (c) 1990, 1993
5
+ * The Regents of the University of California. All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions
9
+ * are met:
10
+ * 1. Redistributions of source code must retain the above copyright
11
+ * notice, this list of conditions and the following disclaimer.
12
+ * 2. Redistributions in binary form must reproduce the above copyright
13
+ * notice, this list of conditions and the following disclaimer in the
14
+ * documentation and/or other materials provided with the distribution.
15
+ * 3. Neither the name of the University nor the names of its contributors
16
+ * may be used to endorse or promote products derived from this software
17
+ * without specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
+ * SUCH DAMAGE.
30
+ */
31
+
32
+ #if defined __SUNPRO_C || defined __DECC || defined __HP_cc
33
+ # pragma ident "@(#)$Header: /cvsroot/wikipedia/willow/src/bin/willow/daemon.c,v 1.1 2005/05/02 19:15:21 kateturner Exp $"
34
+ # pragma ident "$NetBSD: daemon.c,v 1.9 2003/08/07 16:42:46 agc Exp $"
35
+ #endif
36
+
37
+ #include <fcntl.h>
38
+ #include <stdlib.h>
39
+ #include <unistd.h>
40
+
41
+ int daemon(int nochdir, int noclose)
42
+ {
43
+ int fd;
44
+
45
+ switch (fork()) {
46
+ case -1:
47
+ return (-1);
48
+ case 0:
49
+ break;
50
+ default:
51
+ _exit(EXIT_SUCCESS);
52
+ }
53
+
54
+ if (setsid() == -1)
55
+ return (-1);
56
+
57
+ if (nochdir == 0)
58
+ (void)chdir("/");
59
+
60
+ if (noclose == 0 && (fd = open("/dev/null", O_RDWR, 0)) != -1) {
61
+ (void)dup2(fd, STDIN_FILENO);
62
+ (void)dup2(fd, STDOUT_FILENO);
63
+ (void)dup2(fd, STDERR_FILENO);
64
+ if (fd > STDERR_FILENO)
65
+ (void)close(fd);
66
+ }
67
+ return (0);
68
+ }
mosesdecoder/contrib/lmserver/lmserver.c ADDED
@@ -0,0 +1,2140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
+ /*
3
+ * memcached - memory caching daemon
4
+ *
5
+ * http://www.danga.com/memcached/
6
+ *
7
+ * Copyright 2003 Danga Interactive, Inc. All rights reserved.
8
+ *
9
+ * Use and distribution licensed under the BSD license. See
10
+ * the LICENSE file for full text.
11
+ *
12
+ * Authors:
13
+ * Anatoly Vorobey <mellon@pobox.com>
14
+ * Brad Fitzpatrick <brad@danga.com>
15
+ std *
16
+ * $Id$
17
+ */
18
+ #include "lmserver.h"
19
+ #include "srilm.h"
20
+ #include <sys/stat.h>
21
+ #include <sys/socket.h>
22
+ #include <sys/un.h>
23
+ #include <signal.h>
24
+ #include <sys/resource.h>
25
+ #include <sys/uio.h>
26
+
27
+ /* some POSIX systems need the following definition
28
+ * to get mlockall flags out of sys/mman.h. */
29
+ #ifndef _P1003_1B_VISIBLE
30
+ #define _P1003_1B_VISIBLE
31
+ #endif
32
+ /* need this to get IOV_MAX on some platforms. */
33
+ #ifndef __need_IOV_MAX
34
+ #define __need_IOV_MAX
35
+ #endif
36
+ #include <pwd.h>
37
+ #include <sys/mman.h>
38
+ #include <fcntl.h>
39
+ #include <netinet/tcp.h>
40
+ #include <arpa/inet.h>
41
+ #include <errno.h>
42
+ #include <stdlib.h>
43
+ #include <stdio.h>
44
+ #include <string.h>
45
+ #include <time.h>
46
+ #include <assert.h>
47
+ #include <limits.h>
48
+
49
+ #ifdef HAVE_MALLOC_H
50
+ /* OpenBSD has a malloc.h, but warns to use stdlib.h instead */
51
+ #ifndef __OpenBSD__
52
+ #include <malloc.h>
53
+ #endif
54
+ #endif
55
+
56
+ /* FreeBSD 4.x doesn't have IOV_MAX exposed. */
57
+ #ifndef IOV_MAX
58
+ #if defined(__FreeBSD__) || defined(__APPLE__)
59
+ # define IOV_MAX 1024
60
+ #endif
61
+ #endif
62
+
63
+ /*
64
+ * forward declarations
65
+ */
66
+ static void drive_machine(conn *c);
67
+ static int new_socket(struct addrinfo *ai);
68
+ static int server_socket(const int port, const bool is_udp);
69
+ static int try_read_command(conn *c);
70
+ static int try_read_network(conn *c);
71
+ static int try_read_udp(conn *c);
72
+
73
+ /* stats */
74
+ static void stats_reset(void);
75
+ static void stats_init(void);
76
+
77
+ /* defaults */
78
+ static void settings_init(void);
79
+
80
+ /* event handling, network IO */
81
+ static void event_handler(const int fd, const short which, void *arg);
82
+ static void conn_close(conn *c);
83
+ static void conn_init(void);
84
+ static void accept_new_conns(const bool do_accept);
85
+ static bool update_event(conn *c, const int new_flags);
86
+ static void complete_nread(conn *c);
87
+ static void process_command(conn *c, char *command);
88
+ static int transmit(conn *c);
89
+ static int ensure_iov_space(conn *c);
90
+ static int add_iov(conn *c, const void *buf, int len);
91
+ static int add_msghdr(conn *c);
92
+
93
+ /* time handling */
94
+ static void set_current_time(void); /* update the global variable holding
95
+ global 32-bit seconds-since-start time
96
+ (to avoid 64 bit time_t) */
97
+
98
+ static void conn_free(conn *c);
99
+
100
+ /** exported globals **/
101
+ struct stats stats;
102
+ struct settings settings;
103
+
104
+ /** file scope variables **/
105
+ static item **todelete = NULL;
106
+ static int delcurr;
107
+ static int deltotal;
108
+ static conn *listen_conn = NULL;
109
+ static struct event_base *main_base;
110
+
111
+ #define TRANSMIT_COMPLETE 0
112
+ #define TRANSMIT_INCOMPLETE 1
113
+ #define TRANSMIT_SOFT_ERROR 2
114
+ #define TRANSMIT_HARD_ERROR 3
115
+
116
+ static int *buckets = 0; /* bucket->generation array for a managed instance */
117
+
118
+ #define REALTIME_MAXDELTA 60*60*24*30
119
+ /*
120
+ * given time value that's either unix time or delta from current unix time, return
121
+ * unix time. Use the fact that delta can't exceed one month (and real time value can't
122
+ * be that low).
123
+ */
124
+ static rel_time_t realtime(const time_t exptime) {
125
+ /* no. of seconds in 30 days - largest possible delta exptime */
126
+
127
+ if (exptime == 0) return 0; /* 0 means never expire */
128
+
129
+ if (exptime > REALTIME_MAXDELTA) {
130
+ /* if item expiration is at/before the server started, give it an
131
+ expiration time of 1 second after the server started.
132
+ (because 0 means don't expire). without this, we'd
133
+ underflow and wrap around to some large value way in the
134
+ future, effectively making items expiring in the past
135
+ really expiring never */
136
+ if (exptime <= stats.started)
137
+ return (rel_time_t)1;
138
+ return (rel_time_t)(exptime - stats.started);
139
+ } else {
140
+ return (rel_time_t)(exptime + current_time);
141
+ }
142
+ }
143
+
144
+ static void stats_init(void) {
145
+ stats.curr_items = stats.total_items = stats.curr_conns = stats.total_conns = stats.conn_structs = 0;
146
+ stats.get_cmds = stats.set_cmds = stats.get_hits = stats.get_misses = stats.evictions = 0;
147
+ stats.curr_bytes = stats.bytes_read = stats.bytes_written = 0;
148
+
149
+ /* make the time we started always be 2 seconds before we really
150
+ did, so time(0) - time.started is never zero. if so, things
151
+ like 'settings.oldest_live' which act as booleans as well as
152
+ values are now false in boolean context... */
153
+ stats.started = time(0) - 2;
154
+ }
155
+
156
+ static void stats_reset(void) {
157
+ STATS_LOCK();
158
+ stats.total_items = stats.total_conns = 0;
159
+ stats.get_cmds = stats.set_cmds = stats.get_hits = stats.get_misses = stats.evictions = 0;
160
+ stats.bytes_read = stats.bytes_written = 0;
161
+ STATS_UNLOCK();
162
+ }
163
+
164
+ static void settings_init(void) {
165
+ settings.srilm = NULL;
166
+ settings.srilm_order = 3;
167
+ settings.access=0700;
168
+ settings.port = 11211;
169
+ settings.udpport = 0;
170
+ /* By default this string should be NULL for getaddrinfo() */
171
+ settings.inter = NULL;
172
+ settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
173
+ settings.maxconns = 1024; /* to limit connections-related memory to about 5MB */
174
+ settings.verbose = 0;
175
+ settings.oldest_live = 0;
176
+ settings.evict_to_free = 1; /* push old items out of cache when memory runs out */
177
+ settings.socketpath = NULL; /* by default, not using a unix socket */
178
+ settings.managed = false;
179
+ settings.factor = 1.25;
180
+ settings.chunk_size = 48; /* space for a modest key and value */
181
+ #ifdef USE_THREADS
182
+ settings.num_threads = 4;
183
+ #else
184
+ settings.num_threads = 1;
185
+ #endif
186
+ settings.detail_enabled = 0;
187
+ }
188
+
189
+ /* returns true if a deleted item's delete-locked-time is over, and it
190
+ should be removed from the namespace */
191
+ static bool item_delete_lock_over (item *it) {
192
+ assert(it->it_flags & ITEM_DELETED);
193
+ return (current_time >= it->exptime);
194
+ }
195
+
196
+ /*
197
+ * Adds a message header to a connection.
198
+ *
199
+ * Returns 0 on success, -1 on out-of-memory.
200
+ */
201
+ static int add_msghdr(conn *c)
202
+ {
203
+ struct msghdr *msg;
204
+
205
+ assert(c != NULL);
206
+
207
+ if (c->msgsize == c->msgused) {
208
+ msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr));
209
+ if (! msg)
210
+ return -1;
211
+ c->msglist = msg;
212
+ c->msgsize *= 2;
213
+ }
214
+
215
+ msg = c->msglist + c->msgused;
216
+
217
+ /* this wipes msg_iovlen, msg_control, msg_controllen, and
218
+ msg_flags, the last 3 of which aren't defined on solaris: */
219
+ memset(msg, 0, sizeof(struct msghdr));
220
+
221
+ msg->msg_iov = &c->iov[c->iovused];
222
+
223
+ if (c->request_addr_size > 0) {
224
+ msg->msg_name = &c->request_addr;
225
+ msg->msg_namelen = c->request_addr_size;
226
+ }
227
+
228
+ c->msgbytes = 0;
229
+ c->msgused++;
230
+
231
+ if (c->udp) {
232
+ /* Leave room for the UDP header, which we'll fill in later. */
233
+ return add_iov(c, NULL, UDP_HEADER_SIZE);
234
+ }
235
+
236
+ return 0;
237
+ }
238
+
239
+
240
+ /*
241
+ * Free list management for connections.
242
+ */
243
+
244
+ static conn **freeconns;
245
+ static int freetotal;
246
+ static int freecurr;
247
+
248
+
249
+ static void conn_init(void) {
250
+ freetotal = 200;
251
+ freecurr = 0;
252
+ if ((freeconns = (conn **)malloc(sizeof(conn *) * freetotal)) == NULL) {
253
+ fprintf(stderr, "malloc()\n");
254
+ }
255
+ return;
256
+ }
257
+
258
+ /*
259
+ * Returns a connection from the freelist, if any. Should call this using
260
+ * conn_from_freelist() for thread safety.
261
+ */
262
+ conn *do_conn_from_freelist() {
263
+ conn *c;
264
+
265
+ if (freecurr > 0) {
266
+ c = freeconns[--freecurr];
267
+ } else {
268
+ c = NULL;
269
+ }
270
+
271
+ return c;
272
+ }
273
+
274
+ /*
275
+ * Adds a connection to the freelist. 0 = success. Should call this using
276
+ * conn_add_to_freelist() for thread safety.
277
+ */
278
+ bool do_conn_add_to_freelist(conn *c) {
279
+ if (freecurr < freetotal) {
280
+ freeconns[freecurr++] = c;
281
+ return false;
282
+ } else {
283
+ /* try to enlarge free connections array */
284
+ conn **new_freeconns = realloc(freeconns, sizeof(conn *) * freetotal * 2);
285
+ if (new_freeconns) {
286
+ freetotal *= 2;
287
+ freeconns = new_freeconns;
288
+ freeconns[freecurr++] = c;
289
+ return false;
290
+ }
291
+ }
292
+ return true;
293
+ }
294
+
295
+ conn *conn_new(const int sfd, const int init_state, const int event_flags,
296
+ const int read_buffer_size, const bool is_udp, struct event_base *base) {
297
+ conn *c = conn_from_freelist();
298
+
299
+ if (NULL == c) {
300
+ if (!(c = (conn *)calloc(1, sizeof(conn)))) {
301
+ fprintf(stderr, "calloc()\n");
302
+ return NULL;
303
+ }
304
+
305
+ c->rbuf = c->wbuf = 0;
306
+ c->ilist = 0;
307
+ c->suffixlist = 0;
308
+ c->iov = 0;
309
+ c->msglist = 0;
310
+ c->hdrbuf = 0;
311
+
312
+ c->rsize = read_buffer_size;
313
+ c->wsize = DATA_BUFFER_SIZE;
314
+ c->isize = ITEM_LIST_INITIAL;
315
+ c->suffixsize = SUFFIX_LIST_INITIAL;
316
+ c->iovsize = IOV_LIST_INITIAL;
317
+ c->msgsize = MSG_LIST_INITIAL;
318
+ c->hdrsize = 0;
319
+
320
+ c->rbuf = (char *)malloc((size_t)c->rsize);
321
+ c->wbuf = (char *)malloc((size_t)c->wsize);
322
+ c->ilist = (item **)malloc(sizeof(item *) * c->isize);
323
+ c->suffixlist = (char **)malloc(sizeof(char *) * c->suffixsize);
324
+ c->iov = (struct iovec *)malloc(sizeof(struct iovec) * c->iovsize);
325
+ c->msglist = (struct msghdr *)malloc(sizeof(struct msghdr) * c->msgsize);
326
+
327
+ if (c->rbuf == 0 || c->wbuf == 0 || c->ilist == 0 || c->iov == 0 ||
328
+ c->msglist == 0 || c->suffixlist == 0) {
329
+ conn_free(c);
330
+ fprintf(stderr, "malloc()\n");
331
+ return NULL;
332
+ }
333
+
334
+ STATS_LOCK();
335
+ stats.conn_structs++;
336
+ STATS_UNLOCK();
337
+ }
338
+
339
+ if (settings.verbose > 1) {
340
+ if (init_state == conn_listening)
341
+ fprintf(stderr, "<%d server listening\n", sfd);
342
+ else if (is_udp)
343
+ fprintf(stderr, "<%d server listening (udp)\n", sfd);
344
+ else
345
+ fprintf(stderr, "<%d new client connection\n", sfd);
346
+ }
347
+
348
+ c->sfd = sfd;
349
+ c->udp = is_udp;
350
+ c->state = init_state;
351
+ c->rlbytes = 0;
352
+ c->rbytes = c->wbytes = 0;
353
+ c->wcurr = c->wbuf;
354
+ c->rcurr = c->rbuf;
355
+ c->ritem = 0;
356
+ c->icurr = c->ilist;
357
+ c->suffixcurr = c->suffixlist;
358
+ c->ileft = 0;
359
+ c->suffixleft = 0;
360
+ c->iovused = 0;
361
+ c->msgcurr = 0;
362
+ c->msgused = 0;
363
+
364
+ c->write_and_go = conn_read;
365
+ c->write_and_free = 0;
366
+ c->item = 0;
367
+ c->bucket = -1;
368
+ c->gen = 0;
369
+
370
+ c->noreply = false;
371
+
372
+ event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
373
+ event_base_set(base, &c->event);
374
+ c->ev_flags = event_flags;
375
+
376
+ if (event_add(&c->event, 0) == -1) {
377
+ if (conn_add_to_freelist(c)) {
378
+ conn_free(c);
379
+ }
380
+ perror("event_add");
381
+ return NULL;
382
+ }
383
+
384
+ STATS_LOCK();
385
+ stats.curr_conns++;
386
+ stats.total_conns++;
387
+ STATS_UNLOCK();
388
+
389
+ return c;
390
+ }
391
+
392
+ static void conn_cleanup(conn *c) {
393
+ assert(c != NULL);
394
+
395
+ if (c->write_and_free) {
396
+ free(c->write_and_free);
397
+ c->write_and_free = 0;
398
+ }
399
+ }
400
+
401
+ /*
402
+ * Frees a connection.
403
+ */
404
+ void conn_free(conn *c) {
405
+ if (c) {
406
+ if (c->hdrbuf)
407
+ free(c->hdrbuf);
408
+ if (c->msglist)
409
+ free(c->msglist);
410
+ if (c->rbuf)
411
+ free(c->rbuf);
412
+ if (c->wbuf)
413
+ free(c->wbuf);
414
+ if (c->ilist)
415
+ free(c->ilist);
416
+ if (c->suffixlist)
417
+ free(c->suffixlist);
418
+ if (c->iov)
419
+ free(c->iov);
420
+ free(c);
421
+ }
422
+ }
423
+
424
+ static void conn_close(conn *c) {
425
+ assert(c != NULL);
426
+
427
+ /* delete the event, the socket and the conn */
428
+ event_del(&c->event);
429
+
430
+ if (settings.verbose > 1)
431
+ fprintf(stderr, "<%d connection closed.\n", c->sfd);
432
+
433
+ close(c->sfd);
434
+ accept_new_conns(true);
435
+ conn_cleanup(c);
436
+
437
+ /* if the connection has big buffers, just free it */
438
+ if (c->rsize > READ_BUFFER_HIGHWAT || conn_add_to_freelist(c)) {
439
+ conn_free(c);
440
+ }
441
+
442
+ STATS_LOCK();
443
+ stats.curr_conns--;
444
+ STATS_UNLOCK();
445
+
446
+ return;
447
+ }
448
+
449
+
450
+ /*
451
+ * Shrinks a connection's buffers if they're too big. This prevents
452
+ * periodic large "get" requests from permanently chewing lots of server
453
+ * memory.
454
+ *
455
+ * This should only be called in between requests since it can wipe output
456
+ * buffers!
457
+ */
458
+ static void conn_shrink(conn *c) {
459
+ assert(c != NULL);
460
+
461
+ if (c->udp)
462
+ return;
463
+
464
+ if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) {
465
+ char *newbuf;
466
+
467
+ if (c->rcurr != c->rbuf)
468
+ memmove(c->rbuf, c->rcurr, (size_t)c->rbytes);
469
+
470
+ newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE);
471
+
472
+ if (newbuf) {
473
+ c->rbuf = newbuf;
474
+ c->rsize = DATA_BUFFER_SIZE;
475
+ }
476
+ /* TODO check other branch... */
477
+ c->rcurr = c->rbuf;
478
+ }
479
+
480
+ if (c->isize > ITEM_LIST_HIGHWAT) {
481
+ item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0]));
482
+ if (newbuf) {
483
+ c->ilist = newbuf;
484
+ c->isize = ITEM_LIST_INITIAL;
485
+ }
486
+ /* TODO check error condition? */
487
+ }
488
+
489
+ if (c->msgsize > MSG_LIST_HIGHWAT) {
490
+ struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0]));
491
+ if (newbuf) {
492
+ c->msglist = newbuf;
493
+ c->msgsize = MSG_LIST_INITIAL;
494
+ }
495
+ /* TODO check error condition? */
496
+ }
497
+
498
+ if (c->iovsize > IOV_LIST_HIGHWAT) {
499
+ struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0]));
500
+ if (newbuf) {
501
+ c->iov = newbuf;
502
+ c->iovsize = IOV_LIST_INITIAL;
503
+ }
504
+ /* TODO check return value */
505
+ }
506
+ }
507
+
508
+ /*
509
+ * Sets a connection's current state in the state machine. Any special
510
+ * processing that needs to happen on certain state transitions can
511
+ * happen here.
512
+ */
513
+ static void conn_set_state(conn *c, int state) {
514
+ assert(c != NULL);
515
+
516
+ if (state != c->state) {
517
+ if (state == conn_read) {
518
+ conn_shrink(c);
519
+ //assoc_move_next_bucket();
520
+ }
521
+ c->state = state;
522
+ }
523
+ }
524
+
525
+ /*
526
+ * Ensures that there is room for another struct iovec in a connection's
527
+ * iov list.
528
+ *
529
+ * Returns 0 on success, -1 on out-of-memory.
530
+ */
531
+ static int ensure_iov_space(conn *c) {
532
+ assert(c != NULL);
533
+
534
+ if (c->iovused >= c->iovsize) {
535
+ int i, iovnum;
536
+ struct iovec *new_iov = (struct iovec *)realloc(c->iov,
537
+ (c->iovsize * 2) * sizeof(struct iovec));
538
+ if (! new_iov)
539
+ return -1;
540
+ c->iov = new_iov;
541
+ c->iovsize *= 2;
542
+
543
+ /* Point all the msghdr structures at the new list. */
544
+ for (i = 0, iovnum = 0; i < c->msgused; i++) {
545
+ c->msglist[i].msg_iov = &c->iov[iovnum];
546
+ iovnum += c->msglist[i].msg_iovlen;
547
+ }
548
+ }
549
+
550
+ return 0;
551
+ }
552
+
553
+
554
+ /*
555
+ * Adds data to the list of pending data that will be written out to a
556
+ * connection.
557
+ *
558
+ * Returns 0 on success, -1 on out-of-memory.
559
+ */
560
+
561
+ static int add_iov(conn *c, const void *buf, int len) {
562
+ struct msghdr *m;
563
+ int leftover;
564
+ bool limit_to_mtu;
565
+
566
+ assert(c != NULL);
567
+
568
+ do {
569
+ m = &c->msglist[c->msgused - 1];
570
+
571
+ /*
572
+ * Limit UDP packets, and the first payloads of TCP replies, to
573
+ * UDP_MAX_PAYLOAD_SIZE bytes.
574
+ */
575
+ limit_to_mtu = c->udp || (1 == c->msgused);
576
+
577
+ /* We may need to start a new msghdr if this one is full. */
578
+ if (m->msg_iovlen == IOV_MAX ||
579
+ (limit_to_mtu && c->msgbytes >= UDP_MAX_PAYLOAD_SIZE)) {
580
+ add_msghdr(c);
581
+ m = &c->msglist[c->msgused - 1];
582
+ }
583
+
584
+ if (ensure_iov_space(c) != 0)
585
+ return -1;
586
+
587
+ /* If the fragment is too big to fit in the datagram, split it up */
588
+ if (limit_to_mtu && len + c->msgbytes > UDP_MAX_PAYLOAD_SIZE) {
589
+ leftover = len + c->msgbytes - UDP_MAX_PAYLOAD_SIZE;
590
+ len -= leftover;
591
+ } else {
592
+ leftover = 0;
593
+ }
594
+
595
+ m = &c->msglist[c->msgused - 1];
596
+ m->msg_iov[m->msg_iovlen].iov_base = (void *)buf;
597
+ m->msg_iov[m->msg_iovlen].iov_len = len;
598
+
599
+ c->msgbytes += len;
600
+ c->iovused++;
601
+ m->msg_iovlen++;
602
+
603
+ buf = ((char *)buf) + len;
604
+ len = leftover;
605
+ } while (leftover > 0);
606
+
607
+ return 0;
608
+ }
609
+
610
+
611
+ /*
612
+ * Constructs a set of UDP headers and attaches them to the outgoing messages.
613
+ */
614
+ static int build_udp_headers(conn *c) {
615
+ int i;
616
+ unsigned char *hdr;
617
+
618
+ assert(c != NULL);
619
+
620
+ if (c->msgused > c->hdrsize) {
621
+ void *new_hdrbuf;
622
+ if (c->hdrbuf)
623
+ new_hdrbuf = realloc(c->hdrbuf, c->msgused * 2 * UDP_HEADER_SIZE);
624
+ else
625
+ new_hdrbuf = malloc(c->msgused * 2 * UDP_HEADER_SIZE);
626
+ if (! new_hdrbuf)
627
+ return -1;
628
+ c->hdrbuf = (unsigned char *)new_hdrbuf;
629
+ c->hdrsize = c->msgused * 2;
630
+ }
631
+
632
+ hdr = c->hdrbuf;
633
+ for (i = 0; i < c->msgused; i++) {
634
+ c->msglist[i].msg_iov[0].iov_base = hdr;
635
+ c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE;
636
+ *hdr++ = c->request_id / 256;
637
+ *hdr++ = c->request_id % 256;
638
+ *hdr++ = i / 256;
639
+ *hdr++ = i % 256;
640
+ *hdr++ = c->msgused / 256;
641
+ *hdr++ = c->msgused % 256;
642
+ *hdr++ = 0;
643
+ *hdr++ = 0;
644
+ assert((void *) hdr == (void *)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE);
645
+ }
646
+
647
+ return 0;
648
+ }
649
+
650
+
651
+ static void out_string(conn *c, const char *str) {
652
+ size_t len;
653
+
654
+ assert(c != NULL);
655
+
656
+ if (c->noreply) {
657
+ if (settings.verbose > 1)
658
+ fprintf(stderr, ">%d NOREPLY %s\n", c->sfd, str);
659
+ c->noreply = false;
660
+ conn_set_state(c, conn_read);
661
+ return;
662
+ }
663
+
664
+ if (settings.verbose > 1)
665
+ fprintf(stderr, ">%d %s\n", c->sfd, str);
666
+
667
+ len = strlen(str);
668
+ if ((len + 2) > c->wsize) {
669
+ /* ought to be always enough. just fail for simplicity */
670
+ str = "SERVER_ERROR output line too long";
671
+ len = strlen(str);
672
+ }
673
+
674
+ memcpy(c->wbuf, str, len);
675
+ memcpy(c->wbuf + len, "\r\n", 2);
676
+ c->wbytes = len + 2;
677
+ c->wcurr = c->wbuf;
678
+
679
+ conn_set_state(c, conn_write);
680
+ c->write_and_go = conn_read;
681
+ return;
682
+ }
683
+
684
+ typedef struct token_s {
685
+ char *value;
686
+ size_t length;
687
+ } token_t;
688
+
689
+ #define COMMAND_TOKEN 0
690
+ #define SUBCOMMAND_TOKEN 1
691
+ #define KEY_TOKEN 1
692
+ #define KEY_MAX_LENGTH 250
693
+
694
+ #define MAX_TOKENS 8
695
+
696
+ /*
697
+ * Tokenize the command string by replacing whitespace with '\0' and update
698
+ * the token array tokens with pointer to start of each token and length.
699
+ * Returns total number of tokens. The last valid token is the terminal
700
+ * token (value points to the first unprocessed character of the string and
701
+ * length zero).
702
+ *
703
+ * Usage example:
704
+ *
705
+ * while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) {
706
+ * for(int ix = 0; tokens[ix].length != 0; ix++) {
707
+ * ...
708
+ * }
709
+ * ncommand = tokens[ix].value - command;
710
+ * command = tokens[ix].value;
711
+ * }
712
+ */
713
+ static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) {
714
+ char *s, *e;
715
+ size_t ntokens = 0;
716
+
717
+ assert(command != NULL && tokens != NULL && max_tokens > 1);
718
+
719
+ for (s = e = command; ntokens < max_tokens - 1; ++e) {
720
+ if (*e == ' ') {
721
+ if (s != e) {
722
+ tokens[ntokens].value = s;
723
+ tokens[ntokens].length = e - s;
724
+ ntokens++;
725
+ *e = '\0';
726
+ }
727
+ s = e + 1;
728
+ }
729
+ else if (*e == '\0') {
730
+ if (s != e) {
731
+ tokens[ntokens].value = s;
732
+ tokens[ntokens].length = e - s;
733
+ ntokens++;
734
+ }
735
+
736
+ break; /* string end */
737
+ }
738
+ }
739
+
740
+ /*
741
+ * If we scanned the whole string, the terminal value pointer is null,
742
+ * otherwise it is the first unprocessed character.
743
+ */
744
+ tokens[ntokens].value = *e == '\0' ? NULL : e;
745
+ tokens[ntokens].length = 0;
746
+ ntokens++;
747
+
748
+ return ntokens;
749
+ }
750
+
751
+ /* set up a connection to write a buffer then free it, used for stats */
752
+ static void write_and_free(conn *c, char *buf, int bytes) {
753
+ if (buf) {
754
+ c->write_and_free = buf;
755
+ c->wcurr = buf;
756
+ c->wbytes = bytes;
757
+ conn_set_state(c, conn_write);
758
+ c->write_and_go = conn_read;
759
+ } else {
760
+ out_string(c, "SERVER_ERROR out of memory writing stats");
761
+ }
762
+ }
763
+
764
+ static inline void set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens)
765
+ {
766
+ int noreply_index = ntokens - 2;
767
+
768
+ /*
769
+ NOTE: this function is not the first place where we are going to
770
+ send the reply. We could send it instead from process_command()
771
+ if the request line has wrong number of tokens. However parsing
772
+ malformed line for "noreply" option is not reliable anyway, so
773
+ it can't be helped.
774
+ */
775
+ if (tokens[noreply_index].value
776
+ && strcmp(tokens[noreply_index].value, "noreply") == 0) {
777
+ c->noreply = true;
778
+ }
779
+ }
780
+
781
+ inline static void process_stats_detail(conn *c, const char *command) {
782
+ assert(c != NULL);
783
+
784
+ if (strcmp(command, "on") == 0) {
785
+ settings.detail_enabled = 1;
786
+ out_string(c, "OK");
787
+ }
788
+ else if (strcmp(command, "off") == 0) {
789
+ settings.detail_enabled = 0;
790
+ out_string(c, "OK");
791
+ } else {
792
+ out_string(c, "CLIENT_ERROR usage: stats detail on|off|dump");
793
+ }
794
+ }
795
+
796
+ static void process_stat(conn *c, token_t *tokens, const size_t ntokens) {
797
+ rel_time_t now = current_time;
798
+ char *command;
799
+ char *subcommand;
800
+
801
+ assert(c != NULL);
802
+
803
+ if(ntokens < 2) {
804
+ out_string(c, "CLIENT_ERROR bad command line");
805
+ return;
806
+ }
807
+
808
+ command = tokens[COMMAND_TOKEN].value;
809
+
810
+ if (ntokens == 2 && strcmp(command, "stats") == 0) {
811
+ char temp[1024];
812
+ pid_t pid = getpid();
813
+ char *pos = temp;
814
+
815
+ #ifndef WIN32
816
+ struct rusage usage;
817
+ getrusage(RUSAGE_SELF, &usage);
818
+ #endif /* !WIN32 */
819
+
820
+ STATS_LOCK();
821
+ pos += sprintf(pos, "STAT pid %u\r\n", pid);
822
+ pos += sprintf(pos, "STAT uptime %u\r\n", now);
823
+ pos += sprintf(pos, "STAT time %ld\r\n", now + stats.started);
824
+ pos += sprintf(pos, "STAT version " VERSION "\r\n");
825
+ pos += sprintf(pos, "STAT pointer_size %d\r\n", 8 * sizeof(void *));
826
+ #ifndef WIN32
827
+ pos += sprintf(pos, "STAT rusage_user %ld.%06ld\r\n", usage.ru_utime.tv_sec, usage.ru_utime.tv_usec);
828
+ pos += sprintf(pos, "STAT rusage_system %ld.%06ld\r\n", usage.ru_stime.tv_sec, usage.ru_stime.tv_usec);
829
+ #endif /* !WIN32 */
830
+ pos += sprintf(pos, "STAT curr_items %u\r\n", stats.curr_items);
831
+ pos += sprintf(pos, "STAT total_items %u\r\n", stats.total_items);
832
+ pos += sprintf(pos, "STAT bytes %llu\r\n", stats.curr_bytes);
833
+ pos += sprintf(pos, "STAT curr_connections %u\r\n", stats.curr_conns - 1); /* ignore listening conn */
834
+ pos += sprintf(pos, "STAT total_connections %u\r\n", stats.total_conns);
835
+ pos += sprintf(pos, "STAT connection_structures %u\r\n", stats.conn_structs);
836
+ pos += sprintf(pos, "STAT cmd_get %llu\r\n", stats.get_cmds);
837
+ pos += sprintf(pos, "STAT cmd_set %llu\r\n", stats.set_cmds);
838
+ pos += sprintf(pos, "STAT get_hits %llu\r\n", stats.get_hits);
839
+ pos += sprintf(pos, "STAT get_misses %llu\r\n", stats.get_misses);
840
+ pos += sprintf(pos, "STAT evictions %llu\r\n", stats.evictions);
841
+ pos += sprintf(pos, "STAT bytes_read %llu\r\n", stats.bytes_read);
842
+ pos += sprintf(pos, "STAT bytes_written %llu\r\n", stats.bytes_written);
843
+ pos += sprintf(pos, "STAT limit_maxbytes %llu\r\n", (uint64_t) settings.maxbytes);
844
+ pos += sprintf(pos, "STAT threads %u\r\n", settings.num_threads);
845
+ pos += sprintf(pos, "END");
846
+ STATS_UNLOCK();
847
+ out_string(c, temp);
848
+ return;
849
+ }
850
+
851
+ subcommand = tokens[SUBCOMMAND_TOKEN].value;
852
+
853
+ if (strcmp(subcommand, "reset") == 0) {
854
+ stats_reset();
855
+ out_string(c, "RESET");
856
+ return;
857
+ }
858
+
859
+ #ifdef HAVE_MALLOC_H
860
+ #ifdef HAVE_STRUCT_MALLINFO
861
+ if (strcmp(subcommand, "malloc") == 0) {
862
+ char temp[512];
863
+ struct mallinfo info;
864
+ char *pos = temp;
865
+
866
+ info = mallinfo();
867
+ pos += sprintf(pos, "STAT arena_size %d\r\n", info.arena);
868
+ pos += sprintf(pos, "STAT free_chunks %d\r\n", info.ordblks);
869
+ pos += sprintf(pos, "STAT fastbin_blocks %d\r\n", info.smblks);
870
+ pos += sprintf(pos, "STAT mmapped_regions %d\r\n", info.hblks);
871
+ pos += sprintf(pos, "STAT mmapped_space %d\r\n", info.hblkhd);
872
+ pos += sprintf(pos, "STAT max_total_alloc %d\r\n", info.usmblks);
873
+ pos += sprintf(pos, "STAT fastbin_space %d\r\n", info.fsmblks);
874
+ pos += sprintf(pos, "STAT total_alloc %d\r\n", info.uordblks);
875
+ pos += sprintf(pos, "STAT total_free %d\r\n", info.fordblks);
876
+ pos += sprintf(pos, "STAT releasable_space %d\r\nEND", info.keepcost);
877
+ out_string(c, temp);
878
+ return;
879
+ }
880
+ #endif /* HAVE_STRUCT_MALLINFO */
881
+ #endif /* HAVE_MALLOC_H */
882
+ out_string(c, "ERROR");
883
+ }
884
+
885
+ static inline void process_srilm_command(conn *c, token_t *tokens, size_t ntokens) {
886
+ int context[6];
887
+ int i = 1;
888
+ int j = ntokens - 3;
889
+ while (tokens[i].length) {
890
+ context[i-1] = srilm_getvoc(tokens[i].value);
891
+ ++i;
892
+ }
893
+ float p = -999.0f;
894
+ if (context[0] != -1) {
895
+ context[i-1] = -1;
896
+ p = srilm_wordprob(context[0], &context[1]);
897
+ }
898
+
899
+ memcpy(c->wbuf, &p, sizeof(float));
900
+ memcpy(c->wbuf + sizeof(float), "\r\n", 2);
901
+ c->wbytes = sizeof(float) + 2;
902
+ c->wcurr = c->wbuf;
903
+
904
+ conn_set_state(c, conn_write);
905
+ c->write_and_go = conn_read;
906
+ }
907
+
908
+ static void process_command(conn *c, char *command) {
909
+
910
+ token_t tokens[MAX_TOKENS];
911
+ size_t ntokens;
912
+ int comm;
913
+
914
+ assert(c != NULL);
915
+
916
+ if (settings.verbose > 1)
917
+ fprintf(stderr, "<%d %s\n", c->sfd, command);
918
+
919
+ /*
920
+ * for commands set/add/replace, we build an item and read the data
921
+ * directly into it, then continue in nread_complete().
922
+ */
923
+
924
+ c->msgcurr = 0;
925
+ c->msgused = 0;
926
+ c->iovused = 0;
927
+ if (add_msghdr(c) != 0) {
928
+ out_string(c, "SERVER_ERROR out of memory preparing response");
929
+ return;
930
+ }
931
+
932
+ ntokens = tokenize_command(command, tokens, MAX_TOKENS);
933
+ if (ntokens >1 &&
934
+ strcmp(tokens[COMMAND_TOKEN].value, "prob") == 0) {
935
+ process_srilm_command(c, tokens, ntokens);
936
+ } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) {
937
+
938
+ process_stat(c, tokens, ntokens);
939
+
940
+ } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) {
941
+
942
+ out_string(c, "VERSION " VERSION);
943
+
944
+ } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) {
945
+
946
+ conn_set_state(c, conn_closing);
947
+
948
+ } else {
949
+ out_string(c, "ERROR");
950
+ }
951
+ return;
952
+ }
953
+
954
+ /*
955
+ * if we have a complete line in the buffer, process it.
956
+ */
957
+ static int try_read_command(conn *c) {
958
+ char *el, *cont;
959
+
960
+ assert(c != NULL);
961
+ assert(c->rcurr <= (c->rbuf + c->rsize));
962
+
963
+ if (c->rbytes == 0)
964
+ return 0;
965
+ el = memchr(c->rcurr, '\n', c->rbytes);
966
+ if (!el)
967
+ return 0;
968
+ cont = el + 1;
969
+ if ((el - c->rcurr) > 1 && *(el - 1) == '\r') {
970
+ el--;
971
+ }
972
+ *el = '\0';
973
+
974
+ assert(cont <= (c->rcurr + c->rbytes));
975
+
976
+ process_command(c, c->rcurr);
977
+
978
+ c->rbytes -= (cont - c->rcurr);
979
+ c->rcurr = cont;
980
+
981
+ assert(c->rcurr <= (c->rbuf + c->rsize));
982
+
983
+ return 1;
984
+ }
985
+
986
+ /*
987
+ * read a UDP request.
988
+ * return 0 if there's nothing to read.
989
+ */
990
+ static int try_read_udp(conn *c) {
991
+ int res;
992
+
993
+ assert(c != NULL);
994
+
995
+ c->request_addr_size = sizeof(c->request_addr);
996
+ res = recvfrom(c->sfd, c->rbuf, c->rsize,
997
+ 0, &c->request_addr, &c->request_addr_size);
998
+ if (res > 8) {
999
+ unsigned char *buf = (unsigned char *)c->rbuf;
1000
+ STATS_LOCK();
1001
+ stats.bytes_read += res;
1002
+ STATS_UNLOCK();
1003
+
1004
+ /* Beginning of UDP packet is the request ID; save it. */
1005
+ c->request_id = buf[0] * 256 + buf[1];
1006
+
1007
+ /* If this is a multi-packet request, drop it. */
1008
+ if (buf[4] != 0 || buf[5] != 1) {
1009
+ out_string(c, "SERVER_ERROR multi-packet request not supported");
1010
+ return 0;
1011
+ }
1012
+
1013
+ /* Don't care about any of the rest of the header. */
1014
+ res -= 8;
1015
+ memmove(c->rbuf, c->rbuf + 8, res);
1016
+
1017
+ c->rbytes += res;
1018
+ c->rcurr = c->rbuf;
1019
+ return 1;
1020
+ }
1021
+ return 0;
1022
+ }
1023
+
1024
+ /*
1025
+ * read from network as much as we can, handle buffer overflow and connection
1026
+ * close.
1027
+ * before reading, move the remaining incomplete fragment of a command
1028
+ * (if any) to the beginning of the buffer.
1029
+ * return 0 if there's nothing to read on the first read.
1030
+ */
1031
+ static int try_read_network(conn *c) {
1032
+ int gotdata = 0;
1033
+ int res;
1034
+
1035
+ assert(c != NULL);
1036
+
1037
+ if (c->rcurr != c->rbuf) {
1038
+ if (c->rbytes != 0) /* otherwise there's nothing to copy */
1039
+ memmove(c->rbuf, c->rcurr, c->rbytes);
1040
+ c->rcurr = c->rbuf;
1041
+ }
1042
+
1043
+ while (1) {
1044
+ if (c->rbytes >= c->rsize) {
1045
+ char *new_rbuf = realloc(c->rbuf, c->rsize * 2);
1046
+ if (!new_rbuf) {
1047
+ if (settings.verbose > 0)
1048
+ fprintf(stderr, "Couldn't realloc input buffer\n");
1049
+ c->rbytes = 0; /* ignore what we read */
1050
+ out_string(c, "SERVER_ERROR out of memory reading request");
1051
+ c->write_and_go = conn_closing;
1052
+ return 1;
1053
+ }
1054
+ c->rcurr = c->rbuf = new_rbuf;
1055
+ c->rsize *= 2;
1056
+ }
1057
+
1058
+ /* unix socket mode doesn't need this, so zeroed out. but why
1059
+ * is this done for every command? presumably for UDP
1060
+ * mode. */
1061
+ if (!settings.socketpath) {
1062
+ c->request_addr_size = sizeof(c->request_addr);
1063
+ } else {
1064
+ c->request_addr_size = 0;
1065
+ }
1066
+
1067
+ int avail = c->rsize - c->rbytes;
1068
+ res = read(c->sfd, c->rbuf + c->rbytes, avail);
1069
+ if (res > 0) {
1070
+ STATS_LOCK();
1071
+ stats.bytes_read += res;
1072
+ STATS_UNLOCK();
1073
+ gotdata = 1;
1074
+ c->rbytes += res;
1075
+ if (res == avail) {
1076
+ continue;
1077
+ } else {
1078
+ break;
1079
+ }
1080
+ }
1081
+ if (res == 0) {
1082
+ /* connection closed */
1083
+ conn_set_state(c, conn_closing);
1084
+ return 1;
1085
+ }
1086
+ if (res == -1) {
1087
+ if (errno == EAGAIN || errno == EWOULDBLOCK) break;
1088
+ /* Should close on unhandled errors. */
1089
+ conn_set_state(c, conn_closing);
1090
+ return 1;
1091
+ }
1092
+ }
1093
+ return gotdata;
1094
+ }
1095
+
1096
+ static bool update_event(conn *c, const int new_flags) {
1097
+ assert(c != NULL);
1098
+
1099
+ struct event_base *base = c->event.ev_base;
1100
+ if (c->ev_flags == new_flags)
1101
+ return true;
1102
+ if (event_del(&c->event) == -1) return false;
1103
+ event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c);
1104
+ event_base_set(base, &c->event);
1105
+ c->ev_flags = new_flags;
1106
+ if (event_add(&c->event, 0) == -1) return false;
1107
+ return true;
1108
+ }
1109
+
1110
+ /*
1111
+ * Sets whether we are listening for new connections or not.
1112
+ */
1113
+ void accept_new_conns(const bool do_accept) {
1114
+ conn *next;
1115
+
1116
+ if (! is_listen_thread())
1117
+ return;
1118
+
1119
+ for (next = listen_conn; next; next = next->next) {
1120
+ if (do_accept) {
1121
+ update_event(next, EV_READ | EV_PERSIST);
1122
+ if (listen(next->sfd, 1024) != 0) {
1123
+ perror("listen");
1124
+ }
1125
+ }
1126
+ else {
1127
+ update_event(next, 0);
1128
+ if (listen(next->sfd, 0) != 0) {
1129
+ perror("listen");
1130
+ }
1131
+ }
1132
+ }
1133
+ }
1134
+
1135
+
1136
+ /*
1137
+ * Transmit the next chunk of data from our list of msgbuf structures.
1138
+ *
1139
+ * Returns:
1140
+ * TRANSMIT_COMPLETE All done writing.
1141
+ * TRANSMIT_INCOMPLETE More data remaining to write.
1142
+ * TRANSMIT_SOFT_ERROR Can't write any more right now.
1143
+ * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
1144
+ */
1145
+ static int transmit(conn *c) {
1146
+ assert(c != NULL);
1147
+
1148
+ if (c->msgcurr < c->msgused &&
1149
+ c->msglist[c->msgcurr].msg_iovlen == 0) {
1150
+ /* Finished writing the current msg; advance to the next. */
1151
+ c->msgcurr++;
1152
+ }
1153
+ if (c->msgcurr < c->msgused) {
1154
+ ssize_t res;
1155
+ struct msghdr *m = &c->msglist[c->msgcurr];
1156
+
1157
+ res = sendmsg(c->sfd, m, 0);
1158
+ if (res > 0) {
1159
+ STATS_LOCK();
1160
+ stats.bytes_written += res;
1161
+ STATS_UNLOCK();
1162
+
1163
+ /* We've written some of the data. Remove the completed
1164
+ iovec entries from the list of pending writes. */
1165
+ while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) {
1166
+ res -= m->msg_iov->iov_len;
1167
+ m->msg_iovlen--;
1168
+ m->msg_iov++;
1169
+ }
1170
+
1171
+ /* Might have written just part of the last iovec entry;
1172
+ adjust it so the next write will do the rest. */
1173
+ if (res > 0) {
1174
+ m->msg_iov->iov_base += res;
1175
+ m->msg_iov->iov_len -= res;
1176
+ }
1177
+ return TRANSMIT_INCOMPLETE;
1178
+ }
1179
+ if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
1180
+ if (!update_event(c, EV_WRITE | EV_PERSIST)) {
1181
+ if (settings.verbose > 0)
1182
+ fprintf(stderr, "Couldn't update event\n");
1183
+ conn_set_state(c, conn_closing);
1184
+ return TRANSMIT_HARD_ERROR;
1185
+ }
1186
+ return TRANSMIT_SOFT_ERROR;
1187
+ }
1188
+ /* if res==0 or res==-1 and error is not EAGAIN or EWOULDBLOCK,
1189
+ we have a real error, on which we close the connection */
1190
+ if (settings.verbose > 0)
1191
+ perror("Failed to write, and not due to blocking");
1192
+
1193
+ if (c->udp)
1194
+ conn_set_state(c, conn_read);
1195
+ else
1196
+ conn_set_state(c, conn_closing);
1197
+ return TRANSMIT_HARD_ERROR;
1198
+ } else {
1199
+ return TRANSMIT_COMPLETE;
1200
+ }
1201
+ }
1202
+
1203
+ static void drive_machine(conn *c) {
1204
+ bool stop = false;
1205
+ int sfd, flags = 1;
1206
+ socklen_t addrlen;
1207
+ struct sockaddr_storage addr;
1208
+ int res;
1209
+
1210
+ assert(c != NULL);
1211
+
1212
+ while (!stop) {
1213
+
1214
+ switch(c->state) {
1215
+ case conn_listening:
1216
+ addrlen = sizeof(addr);
1217
+ if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) {
1218
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
1219
+ /* these are transient, so don't log anything */
1220
+ stop = true;
1221
+ } else if (errno == EMFILE) {
1222
+ if (settings.verbose > 0)
1223
+ fprintf(stderr, "Too many open connections\n");
1224
+ accept_new_conns(false);
1225
+ stop = true;
1226
+ } else {
1227
+ perror("accept()");
1228
+ stop = true;
1229
+ }
1230
+ break;
1231
+ }
1232
+ if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 ||
1233
+ fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) {
1234
+ perror("setting O_NONBLOCK");
1235
+ close(sfd);
1236
+ break;
1237
+ }
1238
+ dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,
1239
+ DATA_BUFFER_SIZE, false);
1240
+ break;
1241
+
1242
+ case conn_read:
1243
+ if (try_read_command(c) != 0) {
1244
+ continue;
1245
+ }
1246
+ if ((c->udp ? try_read_udp(c) : try_read_network(c)) != 0) {
1247
+ continue;
1248
+ }
1249
+ /* we have no command line and no data to read from network */
1250
+ if (!update_event(c, EV_READ | EV_PERSIST)) {
1251
+ if (settings.verbose > 0)
1252
+ fprintf(stderr, "Couldn't update event\n");
1253
+ conn_set_state(c, conn_closing);
1254
+ break;
1255
+ }
1256
+ stop = true;
1257
+ break;
1258
+
1259
+ case conn_nread:
1260
+ assert(!"nread should not be possible");
1261
+ break;
1262
+
1263
+ case conn_swallow:
1264
+ /* we are reading sbytes and throwing them away */
1265
+ if (c->sbytes == 0) {
1266
+ conn_set_state(c, conn_read);
1267
+ break;
1268
+ }
1269
+
1270
+ /* first check if we have leftovers in the conn_read buffer */
1271
+ if (c->rbytes > 0) {
1272
+ int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
1273
+ c->sbytes -= tocopy;
1274
+ c->rcurr += tocopy;
1275
+ c->rbytes -= tocopy;
1276
+ break;
1277
+ }
1278
+
1279
+ /* now try reading from the socket */
1280
+ res = read(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize);
1281
+ if (res > 0) {
1282
+ STATS_LOCK();
1283
+ stats.bytes_read += res;
1284
+ STATS_UNLOCK();
1285
+ c->sbytes -= res;
1286
+ break;
1287
+ }
1288
+ if (res == 0) { /* end of stream */
1289
+ conn_set_state(c, conn_closing);
1290
+ break;
1291
+ }
1292
+ if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
1293
+ if (!update_event(c, EV_READ | EV_PERSIST)) {
1294
+ if (settings.verbose > 0)
1295
+ fprintf(stderr, "Couldn't update event\n");
1296
+ conn_set_state(c, conn_closing);
1297
+ break;
1298
+ }
1299
+ stop = true;
1300
+ break;
1301
+ }
1302
+ /* otherwise we have a real error, on which we close the connection */
1303
+ if (settings.verbose > 0)
1304
+ fprintf(stderr, "Failed to read, and not due to blocking\n");
1305
+ conn_set_state(c, conn_closing);
1306
+ break;
1307
+
1308
+ case conn_write:
1309
+ /*
1310
+ * We want to write out a simple response. If we haven't already,
1311
+ * assemble it into a msgbuf list (this will be a single-entry
1312
+ * list for TCP or a two-entry list for UDP).
1313
+ */
1314
+ if (c->iovused == 0 || (c->udp && c->iovused == 1)) {
1315
+ if (add_iov(c, c->wcurr, c->wbytes) != 0 ||
1316
+ (c->udp && build_udp_headers(c) != 0)) {
1317
+ if (settings.verbose > 0)
1318
+ fprintf(stderr, "Couldn't build response\n");
1319
+ conn_set_state(c, conn_closing);
1320
+ break;
1321
+ }
1322
+ }
1323
+
1324
+ /* fall through... */
1325
+
1326
+ case conn_mwrite:
1327
+ switch (transmit(c)) {
1328
+ case TRANSMIT_COMPLETE:
1329
+ if (c->state == conn_write) {
1330
+ if (c->write_and_free) {
1331
+ free(c->write_and_free);
1332
+ c->write_and_free = 0;
1333
+ }
1334
+ conn_set_state(c, c->write_and_go);
1335
+ } else {
1336
+ if (settings.verbose > 0)
1337
+ fprintf(stderr, "Unexpected state %d\n", c->state);
1338
+ conn_set_state(c, conn_closing);
1339
+ }
1340
+ break;
1341
+
1342
+ case TRANSMIT_INCOMPLETE:
1343
+ case TRANSMIT_HARD_ERROR:
1344
+ break; /* Continue in state machine. */
1345
+
1346
+ case TRANSMIT_SOFT_ERROR:
1347
+ stop = true;
1348
+ break;
1349
+ }
1350
+ break;
1351
+
1352
+ case conn_closing:
1353
+ if (c->udp)
1354
+ conn_cleanup(c);
1355
+ else
1356
+ conn_close(c);
1357
+ stop = true;
1358
+ break;
1359
+ }
1360
+ }
1361
+
1362
+ return;
1363
+ }
1364
+
1365
+ void event_handler(const int fd, const short which, void *arg) {
1366
+ conn *c;
1367
+
1368
+ c = (conn *)arg;
1369
+ assert(c != NULL);
1370
+
1371
+ c->which = which;
1372
+
1373
+ /* sanity */
1374
+ if (fd != c->sfd) {
1375
+ if (settings.verbose > 0)
1376
+ fprintf(stderr, "Catastrophic: event fd doesn't match conn fd!\n");
1377
+ conn_close(c);
1378
+ return;
1379
+ }
1380
+
1381
+ drive_machine(c);
1382
+
1383
+ /* wait for next event */
1384
+ return;
1385
+ }
1386
+
1387
+ static int new_socket(struct addrinfo *ai) {
1388
+ int sfd;
1389
+ int flags;
1390
+
1391
+ if ((sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) == -1) {
1392
+ perror("socket()");
1393
+ return -1;
1394
+ }
1395
+
1396
+ if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 ||
1397
+ fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) {
1398
+ perror("setting O_NONBLOCK");
1399
+ close(sfd);
1400
+ return -1;
1401
+ }
1402
+ return sfd;
1403
+ }
1404
+
1405
+
1406
+ /*
1407
+ * Sets a socket's send buffer size to the maximum allowed by the system.
1408
+ */
1409
+ static void maximize_sndbuf(const int sfd) {
1410
+ socklen_t intsize = sizeof(int);
1411
+ int last_good = 0;
1412
+ int min, max, avg;
1413
+ int old_size;
1414
+
1415
+ /* Start with the default size. */
1416
+ if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, &old_size, &intsize) != 0) {
1417
+ if (settings.verbose > 0)
1418
+ perror("getsockopt(SO_SNDBUF)");
1419
+ return;
1420
+ }
1421
+
1422
+ /* Binary-search for the real maximum. */
1423
+ min = old_size;
1424
+ max = MAX_SENDBUF_SIZE;
1425
+
1426
+ while (min <= max) {
1427
+ avg = ((unsigned int)(min + max)) / 2;
1428
+ if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) {
1429
+ last_good = avg;
1430
+ min = avg + 1;
1431
+ } else {
1432
+ max = avg - 1;
1433
+ }
1434
+ }
1435
+
1436
+ if (settings.verbose > 1)
1437
+ fprintf(stderr, "<%d send buffer was %d, now %d\n", sfd, old_size, last_good);
1438
+ }
1439
+
1440
+ static int server_socket(const int port, const bool is_udp) {
1441
+ int sfd;
1442
+ struct linger ling = {0, 0};
1443
+ struct addrinfo *ai;
1444
+ struct addrinfo *next;
1445
+ struct addrinfo hints;
1446
+ char port_buf[NI_MAXSERV];
1447
+ int error;
1448
+ int success = 0;
1449
+
1450
+ int flags =1;
1451
+
1452
+ /*
1453
+ * the memset call clears nonstandard fields in some impementations
1454
+ * that otherwise mess things up.
1455
+ */
1456
+ memset(&hints, 0, sizeof (hints));
1457
+ hints.ai_flags = AI_PASSIVE|AI_ADDRCONFIG;
1458
+ if (is_udp)
1459
+ {
1460
+ hints.ai_protocol = IPPROTO_UDP;
1461
+ hints.ai_socktype = SOCK_DGRAM;
1462
+ hints.ai_family = AF_INET; /* This left here because of issues with OSX 10.5 */
1463
+ } else {
1464
+ hints.ai_family = AF_UNSPEC;
1465
+ hints.ai_protocol = IPPROTO_TCP;
1466
+ hints.ai_socktype = SOCK_STREAM;
1467
+ }
1468
+
1469
+ snprintf(port_buf, NI_MAXSERV, "%d", port);
1470
+ error= getaddrinfo(settings.inter, port_buf, &hints, &ai);
1471
+ if (error != 0) {
1472
+ if (error != EAI_SYSTEM)
1473
+ fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error));
1474
+ else
1475
+ perror("getaddrinfo()");
1476
+
1477
+ return 1;
1478
+ }
1479
+
1480
+ for (next= ai; next; next= next->ai_next) {
1481
+ conn *listen_conn_add;
1482
+ if ((sfd = new_socket(next)) == -1) {
1483
+ freeaddrinfo(ai);
1484
+ return 1;
1485
+ }
1486
+
1487
+ setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
1488
+ if (is_udp) {
1489
+ maximize_sndbuf(sfd);
1490
+ } else {
1491
+ setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
1492
+ setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
1493
+ setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));
1494
+ }
1495
+
1496
+ if (bind(sfd, next->ai_addr, next->ai_addrlen) == -1) {
1497
+ if (errno != EADDRINUSE) {
1498
+ perror("bind()");
1499
+ close(sfd);
1500
+ freeaddrinfo(ai);
1501
+ return 1;
1502
+ }
1503
+ close(sfd);
1504
+ continue;
1505
+ } else {
1506
+ success++;
1507
+ if (!is_udp && listen(sfd, 1024) == -1) {
1508
+ perror("listen()");
1509
+ close(sfd);
1510
+ freeaddrinfo(ai);
1511
+ return 1;
1512
+ }
1513
+ }
1514
+
1515
+ if (is_udp)
1516
+ {
1517
+ int c;
1518
+
1519
+ for (c = 0; c < settings.num_threads; c++) {
1520
+ /* this is guaranteed to hit all threads because we round-robin */
1521
+ dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,
1522
+ UDP_READ_BUFFER_SIZE, 1);
1523
+ }
1524
+ } else {
1525
+ if (!(listen_conn_add = conn_new(sfd, conn_listening,
1526
+ EV_READ | EV_PERSIST, 1, false, main_base))) {
1527
+ fprintf(stderr, "failed to create listening connection\n");
1528
+ exit(EXIT_FAILURE);
1529
+ }
1530
+
1531
+ listen_conn_add->next = listen_conn;
1532
+ listen_conn = listen_conn_add;
1533
+ }
1534
+ }
1535
+
1536
+ freeaddrinfo(ai);
1537
+
1538
+ /* Return zero iff we detected no errors in starting up connections */
1539
+ return success == 0;
1540
+ }
1541
+
1542
+ static int new_socket_unix(void) {
1543
+ int sfd;
1544
+ int flags;
1545
+
1546
+ if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
1547
+ perror("socket()");
1548
+ return -1;
1549
+ }
1550
+
1551
+ if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 ||
1552
+ fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) {
1553
+ perror("setting O_NONBLOCK");
1554
+ close(sfd);
1555
+ return -1;
1556
+ }
1557
+ return sfd;
1558
+ }
1559
+
1560
+ static int server_socket_unix(const char *path, int access_mask) {
1561
+ int sfd;
1562
+ struct linger ling = {0, 0};
1563
+ struct sockaddr_un addr;
1564
+ struct stat tstat;
1565
+ int flags =1;
1566
+ int old_umask;
1567
+
1568
+ if (!path) {
1569
+ return 1;
1570
+ }
1571
+
1572
+ if ((sfd = new_socket_unix()) == -1) {
1573
+ return 1;
1574
+ }
1575
+
1576
+ /*
1577
+ * Clean up a previous socket file if we left it around
1578
+ */
1579
+ if (lstat(path, &tstat) == 0) {
1580
+ if (S_ISSOCK(tstat.st_mode))
1581
+ unlink(path);
1582
+ }
1583
+
1584
+ setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
1585
+ setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
1586
+ setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
1587
+
1588
+ /*
1589
+ * the memset call clears nonstandard fields in some impementations
1590
+ * that otherwise mess things up.
1591
+ */
1592
+ memset(&addr, 0, sizeof(addr));
1593
+
1594
+ addr.sun_family = AF_UNIX;
1595
+ strcpy(addr.sun_path, path);
1596
+ old_umask=umask( ~(access_mask&0777));
1597
+ if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
1598
+ perror("bind()");
1599
+ close(sfd);
1600
+ umask(old_umask);
1601
+ return 1;
1602
+ }
1603
+ umask(old_umask);
1604
+ if (listen(sfd, 1024) == -1) {
1605
+ perror("listen()");
1606
+ close(sfd);
1607
+ return 1;
1608
+ }
1609
+ if (!(listen_conn = conn_new(sfd, conn_listening,
1610
+ EV_READ | EV_PERSIST, 1, false, main_base))) {
1611
+ fprintf(stderr, "failed to create listening connection\n");
1612
+ exit(EXIT_FAILURE);
1613
+ }
1614
+
1615
+ return 0;
1616
+ }
1617
+
1618
+ /*
1619
+ * We keep the current time of day in a global variable that's updated by a
1620
+ * timer event. This saves us a bunch of time() system calls (we really only
1621
+ * need to get the time once a second, whereas there can be tens of thousands
1622
+ * of requests a second) and allows us to use server-start-relative timestamps
1623
+ * rather than absolute UNIX timestamps, a space savings on systems where
1624
+ * sizeof(time_t) > sizeof(unsigned int).
1625
+ */
1626
+ volatile rel_time_t current_time;
1627
+ static struct event clockevent;
1628
+
1629
+ /* time-sensitive callers can call it by hand with this, outside the normal ever-1-second timer */
1630
+ static void set_current_time(void) {
1631
+ struct timeval timer;
1632
+
1633
+ gettimeofday(&timer, NULL);
1634
+ current_time = (rel_time_t) (timer.tv_sec - stats.started);
1635
+ }
1636
+
1637
+ static void clock_handler(const int fd, const short which, void *arg) {
1638
+ struct timeval t = {.tv_sec = 1, .tv_usec = 0};
1639
+ static bool initialized = false;
1640
+
1641
+ if (initialized) {
1642
+ /* only delete the event if it's actually there. */
1643
+ evtimer_del(&clockevent);
1644
+ } else {
1645
+ initialized = true;
1646
+ }
1647
+
1648
+ evtimer_set(&clockevent, clock_handler, 0);
1649
+ event_base_set(main_base, &clockevent);
1650
+ evtimer_add(&clockevent, &t);
1651
+
1652
+ set_current_time();
1653
+ }
1654
+
1655
+ static void usage(void) {
1656
+ printf(PACKAGE " " VERSION "\n");
1657
+ printf("-p <num> TCP port number to listen on (default: 11211)\n"
1658
+ "-U <num> UDP port number to listen on (default: 0, off)\n"
1659
+ "-s <file> unix socket path to listen on (disables network support)\n"
1660
+ "-a <mask> access mask for unix socket, in octal (default 0700)\n"
1661
+ "-l <ip_addr> interface to listen on, default is INDRR_ANY\n"
1662
+ "-d run as a daemon\n"
1663
+ "-r maximize core file limit\n"
1664
+ "-u <username> assume identity of <username> (only when run as root)\n"
1665
+ "-m <num> max memory to use for items in megabytes, default is 64 MB\n"
1666
+ "-M return error on memory exhausted (rather than removing items)\n"
1667
+ "-c <num> max simultaneous connections, default is 1024\n"
1668
+ "-k lock down all paged memory. Note that there is a\n"
1669
+ " limit on how much memory you may lock. Trying to\n"
1670
+ " allocate more than that would fail, so be sure you\n"
1671
+ " set the limit correctly for the user you started\n"
1672
+ " the daemon with (not for -u <username> user;\n"
1673
+ " under sh this is done with 'ulimit -S -l NUM_KB').\n"
1674
+ "-v verbose (print errors/warnings while in event loop)\n"
1675
+ "-vv very verbose (also print client commands/reponses)\n"
1676
+ "-h print this help and exit\n"
1677
+ "-i print memcached and libevent license\n"
1678
+ "-b run a managed instanced (mnemonic: buckets)\n"
1679
+ "-P <file> save PID in <file>, only used with -d option\n"
1680
+ "-f <factor> chunk size growth factor, default 1.25\n"
1681
+ "-n <bytes> minimum space allocated for key+value+flags, default 48\n"
1682
+
1683
+ #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
1684
+ "-L Try to use large memory pages (if available). Increasing\n"
1685
+ " the memory page size could reduce the number of TLB misses\n"
1686
+ " and improve the performance. In order to get large pages\n"
1687
+ " from the OS, memcached will allocate the total item-cache\n"
1688
+ " in one large chunk.\n"
1689
+ #endif
1690
+ );
1691
+
1692
+ #ifdef USE_THREADS
1693
+ printf("-t <num> number of threads to use, default 4\n");
1694
+ #endif
1695
+ return;
1696
+ }
1697
+
1698
+ static void usage_license(void) {
1699
+ printf(PACKAGE " " VERSION "\n\n");
1700
+ printf(
1701
+ "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n"
1702
+ "All rights reserved.\n"
1703
+ "\n"
1704
+ "Redistribution and use in source and binary forms, with or without\n"
1705
+ "modification, are permitted provided that the following conditions are\n"
1706
+ "met:\n"
1707
+ "\n"
1708
+ " * Redistributions of source code must retain the above copyright\n"
1709
+ "notice, this list of conditions and the following disclaimer.\n"
1710
+ "\n"
1711
+ " * Redistributions in binary form must reproduce the above\n"
1712
+ "copyright notice, this list of conditions and the following disclaimer\n"
1713
+ "in the documentation and/or other materials provided with the\n"
1714
+ "distribution.\n"
1715
+ "\n"
1716
+ " * Neither the name of the Danga Interactive nor the names of its\n"
1717
+ "contributors may be used to endorse or promote products derived from\n"
1718
+ "this software without specific prior written permission.\n"
1719
+ "\n"
1720
+ "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n"
1721
+ "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n"
1722
+ "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n"
1723
+ "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n"
1724
+ "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n"
1725
+ "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n"
1726
+ "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
1727
+ "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
1728
+ "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
1729
+ "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n"
1730
+ "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
1731
+ "\n"
1732
+ "\n"
1733
+ "This product includes software developed by Niels Provos.\n"
1734
+ "\n"
1735
+ "[ libevent ]\n"
1736
+ "\n"
1737
+ "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n"
1738
+ "All rights reserved.\n"
1739
+ "\n"
1740
+ "Redistribution and use in source and binary forms, with or without\n"
1741
+ "modification, are permitted provided that the following conditions\n"
1742
+ "are met:\n"
1743
+ "1. Redistributions of source code must retain the above copyright\n"
1744
+ " notice, this list of conditions and the following disclaimer.\n"
1745
+ "2. Redistributions in binary form must reproduce the above copyright\n"
1746
+ " notice, this list of conditions and the following disclaimer in the\n"
1747
+ " documentation and/or other materials provided with the distribution.\n"
1748
+ "3. All advertising materials mentioning features or use of this software\n"
1749
+ " must display the following acknowledgement:\n"
1750
+ " This product includes software developed by Niels Provos.\n"
1751
+ "4. The name of the author may not be used to endorse or promote products\n"
1752
+ " derived from this software without specific prior written permission.\n"
1753
+ "\n"
1754
+ "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
1755
+ "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
1756
+ "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
1757
+ "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
1758
+ "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n"
1759
+ "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
1760
+ "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
1761
+ "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
1762
+ "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
1763
+ "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
1764
+ );
1765
+
1766
+ return;
1767
+ }
1768
+
1769
+ static void save_pid(const pid_t pid, const char *pid_file) {
1770
+ FILE *fp;
1771
+ if (pid_file == NULL)
1772
+ return;
1773
+
1774
+ if ((fp = fopen(pid_file, "w")) == NULL) {
1775
+ fprintf(stderr, "Could not open the pid file %s for writing\n", pid_file);
1776
+ return;
1777
+ }
1778
+
1779
+ fprintf(fp,"%ld\n", (long)pid);
1780
+ if (fclose(fp) == -1) {
1781
+ fprintf(stderr, "Could not close the pid file %s.\n", pid_file);
1782
+ return;
1783
+ }
1784
+ }
1785
+
1786
+ static void remove_pidfile(const char *pid_file) {
1787
+ if (pid_file == NULL)
1788
+ return;
1789
+
1790
+ if (unlink(pid_file) != 0) {
1791
+ fprintf(stderr, "Could not remove the pid file %s.\n", pid_file);
1792
+ }
1793
+
1794
+ }
1795
+
1796
+
1797
+ static void sig_handler(const int sig) {
1798
+ printf("SIGINT handled.\n");
1799
+ exit(EXIT_SUCCESS);
1800
+ }
1801
+
1802
+ #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
1803
+ /*
1804
+ * On systems that supports multiple page sizes we may reduce the
1805
+ * number of TLB-misses by using the biggest available page size
1806
+ */
1807
+ int enable_large_pages(void) {
1808
+ int ret = -1;
1809
+ size_t sizes[32];
1810
+ int avail = getpagesizes(sizes, 32);
1811
+ if (avail != -1) {
1812
+ size_t max = sizes[0];
1813
+ struct memcntl_mha arg = {0};
1814
+ int ii;
1815
+
1816
+ for (ii = 1; ii < avail; ++ii) {
1817
+ if (max < sizes[ii]) {
1818
+ max = sizes[ii];
1819
+ }
1820
+ }
1821
+
1822
+ arg.mha_flags = 0;
1823
+ arg.mha_pagesize = max;
1824
+ arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
1825
+
1826
+ if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
1827
+ fprintf(stderr, "Failed to set large pages: %s\n",
1828
+ strerror(errno));
1829
+ fprintf(stderr, "Will use default page size\n");
1830
+ } else {
1831
+ ret = 0;
1832
+ }
1833
+ } else {
1834
+ fprintf(stderr, "Failed to get supported pagesizes: %s\n",
1835
+ strerror(errno));
1836
+ fprintf(stderr, "Will use default page size\n");
1837
+ }
1838
+
1839
+ return ret;
1840
+ }
1841
+ #endif
1842
+
1843
+ int main (int argc, char **argv) {
1844
+ int c;
1845
+ int x;
1846
+ bool lock_memory = false;
1847
+ bool daemonize = false;
1848
+ bool preallocate = false;
1849
+ int maxcore = 0;
1850
+ char *username = NULL;
1851
+ char *pid_file = NULL;
1852
+ struct passwd *pw;
1853
+ struct sigaction sa;
1854
+ struct rlimit rlim;
1855
+ /* listening socket */
1856
+ static int *l_socket = NULL;
1857
+
1858
+ /* udp socket */
1859
+ static int *u_socket = NULL;
1860
+ static int u_socket_count = 0;
1861
+
1862
+ /* handle SIGINT */
1863
+ signal(SIGINT, sig_handler);
1864
+
1865
+ /* init settings */
1866
+ settings_init();
1867
+
1868
+ /* set stderr non-buffering (for running under, say, daemontools) */
1869
+ setbuf(stderr, NULL);
1870
+
1871
+ /* process arguments */
1872
+ while ((c = getopt(argc, argv, "x:o:a:bp:s:U:m:Mc:khirvdl:u:P:f:s:n:t:L")) != -1) {
1873
+ switch (c) {
1874
+ case 'x':
1875
+ settings.srilm = optarg;
1876
+ break;
1877
+ case 'o':
1878
+ settings.srilm_order = atoi(optarg);
1879
+ break;
1880
+ case 'a':
1881
+ /* access for unix domain socket, as octal mask (like chmod)*/
1882
+ settings.access= strtol(optarg,NULL,8);
1883
+ break;
1884
+
1885
+ case 'U':
1886
+ settings.udpport = atoi(optarg);
1887
+ break;
1888
+ case 'b':
1889
+ settings.managed = true;
1890
+ break;
1891
+ case 'p':
1892
+ settings.port = atoi(optarg);
1893
+ break;
1894
+ case 's':
1895
+ settings.socketpath = optarg;
1896
+ break;
1897
+ case 'm':
1898
+ settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
1899
+ break;
1900
+ case 'M':
1901
+ settings.evict_to_free = 0;
1902
+ break;
1903
+ case 'c':
1904
+ settings.maxconns = atoi(optarg);
1905
+ break;
1906
+ case 'h':
1907
+ usage();
1908
+ exit(EXIT_SUCCESS);
1909
+ case 'i':
1910
+ usage_license();
1911
+ exit(EXIT_SUCCESS);
1912
+ case 'k':
1913
+ lock_memory = true;
1914
+ break;
1915
+ case 'v':
1916
+ settings.verbose++;
1917
+ break;
1918
+ case 'l':
1919
+ settings.inter= strdup(optarg);
1920
+ break;
1921
+ case 'd':
1922
+ daemonize = true;
1923
+ break;
1924
+ case 'r':
1925
+ maxcore = 1;
1926
+ break;
1927
+ case 'u':
1928
+ username = optarg;
1929
+ break;
1930
+ case 'P':
1931
+ pid_file = optarg;
1932
+ break;
1933
+ case 'f':
1934
+ settings.factor = atof(optarg);
1935
+ if (settings.factor <= 1.0) {
1936
+ fprintf(stderr, "Factor must be greater than 1\n");
1937
+ return 1;
1938
+ }
1939
+ break;
1940
+ case 'n':
1941
+ settings.chunk_size = atoi(optarg);
1942
+ if (settings.chunk_size == 0) {
1943
+ fprintf(stderr, "Chunk size must be greater than 0\n");
1944
+ return 1;
1945
+ }
1946
+ break;
1947
+ case 't':
1948
+ settings.num_threads = atoi(optarg);
1949
+ if (settings.num_threads == 0) {
1950
+ fprintf(stderr, "Number of threads must be greater than 0\n");
1951
+ return 1;
1952
+ }
1953
+ break;
1954
+ #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
1955
+ case 'L' :
1956
+ if (enable_large_pages() == 0) {
1957
+ preallocate = true;
1958
+ }
1959
+ break;
1960
+ #endif
1961
+ default:
1962
+ fprintf(stderr, "Illegal argument \"%c\"\n", c);
1963
+ return 1;
1964
+ }
1965
+ }
1966
+
1967
+ if (maxcore != 0) {
1968
+ struct rlimit rlim_new;
1969
+ /*
1970
+ * First try raising to infinity; if that fails, try bringing
1971
+ * the soft limit to the hard.
1972
+ */
1973
+ if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
1974
+ rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY;
1975
+ if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) {
1976
+ /* failed. try raising just to the old max */
1977
+ rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max;
1978
+ (void)setrlimit(RLIMIT_CORE, &rlim_new);
1979
+ }
1980
+ }
1981
+ /*
1982
+ * getrlimit again to see what we ended up with. Only fail if
1983
+ * the soft limit ends up 0, because then no core files will be
1984
+ * created at all.
1985
+ */
1986
+
1987
+ if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) {
1988
+ fprintf(stderr, "failed to ensure corefile creation\n");
1989
+ exit(EXIT_FAILURE);
1990
+ }
1991
+ }
1992
+
1993
+ /*
1994
+ * If needed, increase rlimits to allow as many connections
1995
+ * as needed.
1996
+ */
1997
+
1998
+ if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
1999
+ fprintf(stderr, "failed to getrlimit number of files\n");
2000
+ exit(EXIT_FAILURE);
2001
+ } else {
2002
+ int maxfiles = settings.maxconns;
2003
+ if (rlim.rlim_cur < maxfiles)
2004
+ rlim.rlim_cur = maxfiles + 3;
2005
+ if (rlim.rlim_max < rlim.rlim_cur)
2006
+ rlim.rlim_max = rlim.rlim_cur;
2007
+ if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
2008
+ fprintf(stderr, "failed to set rlimit for open files. Try running as root or requesting smaller maxconns value.\n");
2009
+ exit(EXIT_FAILURE);
2010
+ }
2011
+ }
2012
+
2013
+ /* daemonize if requested */
2014
+ /* if we want to ensure our ability to dump core, don't chdir to / */
2015
+ if (daemonize) {
2016
+ int res;
2017
+ res = daemon(maxcore, settings.verbose);
2018
+ if (res == -1) {
2019
+ fprintf(stderr, "failed to daemon() in order to daemonize\n");
2020
+ return 1;
2021
+ }
2022
+ }
2023
+
2024
+ /* lock paged memory if needed */
2025
+ if (lock_memory) {
2026
+ #ifdef HAVE_MLOCKALL
2027
+ int res = mlockall(MCL_CURRENT | MCL_FUTURE);
2028
+ if (res != 0) {
2029
+ fprintf(stderr, "warning: -k invalid, mlockall() failed: %s\n",
2030
+ strerror(errno));
2031
+ }
2032
+ #else
2033
+ fprintf(stderr, "warning: -k invalid, mlockall() not supported on this platform. proceeding without.\n");
2034
+ #endif
2035
+ }
2036
+
2037
+ /* lose root privileges if we have them */
2038
+ if (getuid() == 0 || geteuid() == 0) {
2039
+ if (username == 0 || *username == '\0') {
2040
+ fprintf(stderr, "can't run as root without the -u switch\n");
2041
+ return 1;
2042
+ }
2043
+ if ((pw = getpwnam(username)) == 0) {
2044
+ fprintf(stderr, "can't find the user %s to switch to\n", username);
2045
+ return 1;
2046
+ }
2047
+ if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) {
2048
+ fprintf(stderr, "failed to assume identity of user %s\n", username);
2049
+ return 1;
2050
+ }
2051
+ }
2052
+
2053
+ /* initialize main thread libevent instance */
2054
+ main_base = event_init();
2055
+
2056
+ /* initialize other stuff */
2057
+ stats_init();
2058
+ conn_init();
2059
+ if (!settings.srilm) {
2060
+ fprintf(stderr, "please specify a LM file with -x\n");
2061
+ exit(EXIT_FAILURE);
2062
+ }
2063
+ srilm_init(settings.srilm, settings.srilm_order);
2064
+
2065
+ /* managed instance? alloc and zero a bucket array */
2066
+ if (settings.managed) {
2067
+ buckets = malloc(sizeof(int) * MAX_BUCKETS);
2068
+ if (buckets == 0) {
2069
+ fprintf(stderr, "failed to allocate the bucket array");
2070
+ exit(EXIT_FAILURE);
2071
+ }
2072
+ memset(buckets, 0, sizeof(int) * MAX_BUCKETS);
2073
+ }
2074
+
2075
+ /*
2076
+ * ignore SIGPIPE signals; we can use errno==EPIPE if we
2077
+ * need that information
2078
+ */
2079
+ sa.sa_handler = SIG_IGN;
2080
+ sa.sa_flags = 0;
2081
+ if (sigemptyset(&sa.sa_mask) == -1 ||
2082
+ sigaction(SIGPIPE, &sa, 0) == -1) {
2083
+ perror("failed to ignore SIGPIPE; sigaction");
2084
+ exit(EXIT_FAILURE);
2085
+ }
2086
+ /* start up worker threads if MT mode */
2087
+ thread_init(settings.num_threads, main_base);
2088
+ /* save the PID in if we're a daemon, do this after thread_init due to
2089
+ a file descriptor handling bug somewhere in libevent */
2090
+ if (daemonize)
2091
+ save_pid(getpid(), pid_file);
2092
+ /* initialise clock event */
2093
+ clock_handler(0, 0, 0);
2094
+
2095
+ /* create unix mode sockets after dropping privileges */
2096
+ if (settings.socketpath != NULL) {
2097
+ if (server_socket_unix(settings.socketpath,settings.access)) {
2098
+ fprintf(stderr, "failed to listen\n");
2099
+ exit(EXIT_FAILURE);
2100
+ }
2101
+ }
2102
+
2103
+ /* create the listening socket, bind it, and init */
2104
+ if (settings.socketpath == NULL) {
2105
+ int udp_port;
2106
+
2107
+ if (server_socket(settings.port, 0)) {
2108
+ fprintf(stderr, "failed to listen\n");
2109
+ exit(EXIT_FAILURE);
2110
+ }
2111
+ /*
2112
+ * initialization order: first create the listening sockets
2113
+ * (may need root on low ports), then drop root if needed,
2114
+ * then daemonise if needed, then init libevent (in some cases
2115
+ * descriptors created by libevent wouldn't survive forking).
2116
+ */
2117
+ udp_port = settings.udpport ? settings.udpport : settings.port;
2118
+
2119
+ /* create the UDP listening socket and bind it */
2120
+ if (server_socket(udp_port, 1)) {
2121
+ fprintf(stderr, "failed to listen on UDP port %d\n", settings.udpport);
2122
+ exit(EXIT_FAILURE);
2123
+ }
2124
+ }
2125
+
2126
+ /* enter the event loop */
2127
+ event_base_loop(main_base, 0);
2128
+ /* remove the PID file if we're a daemon */
2129
+ if (daemonize)
2130
+ remove_pidfile(pid_file);
2131
+ /* Clean up strdup() call for bind() address */
2132
+ if (settings.inter)
2133
+ free(settings.inter);
2134
+ if (l_socket)
2135
+ free(l_socket);
2136
+ if (u_socket)
2137
+ free(u_socket);
2138
+
2139
+ return 0;
2140
+ }
mosesdecoder/contrib/lmserver/missing ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/sh
2
+ # Common stub for a few missing GNU programs while installing.
3
+
4
+ scriptversion=2006-05-10.23
5
+
6
+ # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006
7
+ # Free Software Foundation, Inc.
8
+ # Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
9
+
10
+ # This program is free software; you can redistribute it and/or modify
11
+ # it under the terms of the GNU General Public License as published by
12
+ # the Free Software Foundation; either version 2, or (at your option)
13
+ # any later version.
14
+
15
+ # This program is distributed in the hope that it will be useful,
16
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
17
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
+ # GNU General Public License for more details.
19
+
20
+ # You should have received a copy of the GNU General Public License
21
+ # along with this program; if not, write to the Free Software
22
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23
+ # 02110-1301, USA.
24
+
25
+ # As a special exception to the GNU General Public License, if you
26
+ # distribute this file as part of a program that contains a
27
+ # configuration script generated by Autoconf, you may include it under
28
+ # the same distribution terms that you use for the rest of that program.
29
+
30
+ if test $# -eq 0; then
31
+ echo 1>&2 "Try \`$0 --help' for more information"
32
+ exit 1
33
+ fi
34
+
35
+ run=:
36
+ sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
37
+ sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
38
+
39
+ # In the cases where this matters, `missing' is being run in the
40
+ # srcdir already.
41
+ if test -f configure.ac; then
42
+ configure_ac=configure.ac
43
+ else
44
+ configure_ac=configure.in
45
+ fi
46
+
47
+ msg="missing on your system"
48
+
49
+ case $1 in
50
+ --run)
51
+ # Try to run requested program, and just exit if it succeeds.
52
+ run=
53
+ shift
54
+ "$@" && exit 0
55
+ # Exit code 63 means version mismatch. This often happens
56
+ # when the user try to use an ancient version of a tool on
57
+ # a file that requires a minimum version. In this case we
58
+ # we should proceed has if the program had been absent, or
59
+ # if --run hadn't been passed.
60
+ if test $? = 63; then
61
+ run=:
62
+ msg="probably too old"
63
+ fi
64
+ ;;
65
+
66
+ -h|--h|--he|--hel|--help)
67
+ echo "\
68
+ $0 [OPTION]... PROGRAM [ARGUMENT]...
69
+
70
+ Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
71
+ error status if there is no known handling for PROGRAM.
72
+
73
+ Options:
74
+ -h, --help display this help and exit
75
+ -v, --version output version information and exit
76
+ --run try to run the given command, and emulate it if it fails
77
+
78
+ Supported PROGRAM values:
79
+ aclocal touch file \`aclocal.m4'
80
+ autoconf touch file \`configure'
81
+ autoheader touch file \`config.h.in'
82
+ autom4te touch the output file, or create a stub one
83
+ automake touch all \`Makefile.in' files
84
+ bison create \`y.tab.[ch]', if possible, from existing .[ch]
85
+ flex create \`lex.yy.c', if possible, from existing .c
86
+ help2man touch the output file
87
+ lex create \`lex.yy.c', if possible, from existing .c
88
+ makeinfo touch the output file
89
+ tar try tar, gnutar, gtar, then tar without non-portable flags
90
+ yacc create \`y.tab.[ch]', if possible, from existing .[ch]
91
+
92
+ Send bug reports to <bug-automake@gnu.org>."
93
+ exit $?
94
+ ;;
95
+
96
+ -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
97
+ echo "missing $scriptversion (GNU Automake)"
98
+ exit $?
99
+ ;;
100
+
101
+ -*)
102
+ echo 1>&2 "$0: Unknown \`$1' option"
103
+ echo 1>&2 "Try \`$0 --help' for more information"
104
+ exit 1
105
+ ;;
106
+
107
+ esac
108
+
109
+ # Now exit if we have it, but it failed. Also exit now if we
110
+ # don't have it and --version was passed (most likely to detect
111
+ # the program).
112
+ case $1 in
113
+ lex|yacc)
114
+ # Not GNU programs, they don't have --version.
115
+ ;;
116
+
117
+ tar)
118
+ if test -n "$run"; then
119
+ echo 1>&2 "ERROR: \`tar' requires --run"
120
+ exit 1
121
+ elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
122
+ exit 1
123
+ fi
124
+ ;;
125
+
126
+ *)
127
+ if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
128
+ # We have it, but it failed.
129
+ exit 1
130
+ elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
131
+ # Could not run --version or --help. This is probably someone
132
+ # running `$TOOL --version' or `$TOOL --help' to check whether
133
+ # $TOOL exists and not knowing $TOOL uses missing.
134
+ exit 1
135
+ fi
136
+ ;;
137
+ esac
138
+
139
+ # If it does not exist, or fails to run (possibly an outdated version),
140
+ # try to emulate it.
141
+ case $1 in
142
+ aclocal*)
143
+ echo 1>&2 "\
144
+ WARNING: \`$1' is $msg. You should only need it if
145
+ you modified \`acinclude.m4' or \`${configure_ac}'. You might want
146
+ to install the \`Automake' and \`Perl' packages. Grab them from
147
+ any GNU archive site."
148
+ touch aclocal.m4
149
+ ;;
150
+
151
+ autoconf)
152
+ echo 1>&2 "\
153
+ WARNING: \`$1' is $msg. You should only need it if
154
+ you modified \`${configure_ac}'. You might want to install the
155
+ \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
156
+ archive site."
157
+ touch configure
158
+ ;;
159
+
160
+ autoheader)
161
+ echo 1>&2 "\
162
+ WARNING: \`$1' is $msg. You should only need it if
163
+ you modified \`acconfig.h' or \`${configure_ac}'. You might want
164
+ to install the \`Autoconf' and \`GNU m4' packages. Grab them
165
+ from any GNU archive site."
166
+ files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
167
+ test -z "$files" && files="config.h"
168
+ touch_files=
169
+ for f in $files; do
170
+ case $f in
171
+ *:*) touch_files="$touch_files "`echo "$f" |
172
+ sed -e 's/^[^:]*://' -e 's/:.*//'`;;
173
+ *) touch_files="$touch_files $f.in";;
174
+ esac
175
+ done
176
+ touch $touch_files
177
+ ;;
178
+
179
+ automake*)
180
+ echo 1>&2 "\
181
+ WARNING: \`$1' is $msg. You should only need it if
182
+ you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
183
+ You might want to install the \`Automake' and \`Perl' packages.
184
+ Grab them from any GNU archive site."
185
+ find . -type f -name Makefile.am -print |
186
+ sed 's/\.am$/.in/' |
187
+ while read f; do touch "$f"; done
188
+ ;;
189
+
190
+ autom4te)
191
+ echo 1>&2 "\
192
+ WARNING: \`$1' is needed, but is $msg.
193
+ You might have modified some files without having the
194
+ proper tools for further handling them.
195
+ You can get \`$1' as part of \`Autoconf' from any GNU
196
+ archive site."
197
+
198
+ file=`echo "$*" | sed -n "$sed_output"`
199
+ test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
200
+ if test -f "$file"; then
201
+ touch $file
202
+ else
203
+ test -z "$file" || exec >$file
204
+ echo "#! /bin/sh"
205
+ echo "# Created by GNU Automake missing as a replacement of"
206
+ echo "# $ $@"
207
+ echo "exit 0"
208
+ chmod +x $file
209
+ exit 1
210
+ fi
211
+ ;;
212
+
213
+ bison|yacc)
214
+ echo 1>&2 "\
215
+ WARNING: \`$1' $msg. You should only need it if
216
+ you modified a \`.y' file. You may need the \`Bison' package
217
+ in order for those modifications to take effect. You can get
218
+ \`Bison' from any GNU archive site."
219
+ rm -f y.tab.c y.tab.h
220
+ if test $# -ne 1; then
221
+ eval LASTARG="\${$#}"
222
+ case $LASTARG in
223
+ *.y)
224
+ SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
225
+ if test -f "$SRCFILE"; then
226
+ cp "$SRCFILE" y.tab.c
227
+ fi
228
+ SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
229
+ if test -f "$SRCFILE"; then
230
+ cp "$SRCFILE" y.tab.h
231
+ fi
232
+ ;;
233
+ esac
234
+ fi
235
+ if test ! -f y.tab.h; then
236
+ echo >y.tab.h
237
+ fi
238
+ if test ! -f y.tab.c; then
239
+ echo 'main() { return 0; }' >y.tab.c
240
+ fi
241
+ ;;
242
+
243
+ lex|flex)
244
+ echo 1>&2 "\
245
+ WARNING: \`$1' is $msg. You should only need it if
246
+ you modified a \`.l' file. You may need the \`Flex' package
247
+ in order for those modifications to take effect. You can get
248
+ \`Flex' from any GNU archive site."
249
+ rm -f lex.yy.c
250
+ if test $# -ne 1; then
251
+ eval LASTARG="\${$#}"
252
+ case $LASTARG in
253
+ *.l)
254
+ SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
255
+ if test -f "$SRCFILE"; then
256
+ cp "$SRCFILE" lex.yy.c
257
+ fi
258
+ ;;
259
+ esac
260
+ fi
261
+ if test ! -f lex.yy.c; then
262
+ echo 'main() { return 0; }' >lex.yy.c
263
+ fi
264
+ ;;
265
+
266
+ help2man)
267
+ echo 1>&2 "\
268
+ WARNING: \`$1' is $msg. You should only need it if
269
+ you modified a dependency of a manual page. You may need the
270
+ \`Help2man' package in order for those modifications to take
271
+ effect. You can get \`Help2man' from any GNU archive site."
272
+
273
+ file=`echo "$*" | sed -n "$sed_output"`
274
+ test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
275
+ if test -f "$file"; then
276
+ touch $file
277
+ else
278
+ test -z "$file" || exec >$file
279
+ echo ".ab help2man is required to generate this page"
280
+ exit 1
281
+ fi
282
+ ;;
283
+
284
+ makeinfo)
285
+ echo 1>&2 "\
286
+ WARNING: \`$1' is $msg. You should only need it if
287
+ you modified a \`.texi' or \`.texinfo' file, or any other file
288
+ indirectly affecting the aspect of the manual. The spurious
289
+ call might also be the consequence of using a buggy \`make' (AIX,
290
+ DU, IRIX). You might want to install the \`Texinfo' package or
291
+ the \`GNU make' package. Grab either from any GNU archive site."
292
+ # The file to touch is that specified with -o ...
293
+ file=`echo "$*" | sed -n "$sed_output"`
294
+ test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
295
+ if test -z "$file"; then
296
+ # ... or it is the one specified with @setfilename ...
297
+ infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
298
+ file=`sed -n '
299
+ /^@setfilename/{
300
+ s/.* \([^ ]*\) *$/\1/
301
+ p
302
+ q
303
+ }' $infile`
304
+ # ... or it is derived from the source name (dir/f.texi becomes f.info)
305
+ test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
306
+ fi
307
+ # If the file does not exist, the user really needs makeinfo;
308
+ # let's fail without touching anything.
309
+ test -f $file || exit 1
310
+ touch $file
311
+ ;;
312
+
313
+ tar)
314
+ shift
315
+
316
+ # We have already tried tar in the generic part.
317
+ # Look for gnutar/gtar before invocation to avoid ugly error
318
+ # messages.
319
+ if (gnutar --version > /dev/null 2>&1); then
320
+ gnutar "$@" && exit 0
321
+ fi
322
+ if (gtar --version > /dev/null 2>&1); then
323
+ gtar "$@" && exit 0
324
+ fi
325
+ firstarg="$1"
326
+ if shift; then
327
+ case $firstarg in
328
+ *o*)
329
+ firstarg=`echo "$firstarg" | sed s/o//`
330
+ tar "$firstarg" "$@" && exit 0
331
+ ;;
332
+ esac
333
+ case $firstarg in
334
+ *h*)
335
+ firstarg=`echo "$firstarg" | sed s/h//`
336
+ tar "$firstarg" "$@" && exit 0
337
+ ;;
338
+ esac
339
+ fi
340
+
341
+ echo 1>&2 "\
342
+ WARNING: I can't seem to be able to run \`tar' with the given arguments.
343
+ You may want to install GNU tar or Free paxutils, or check the
344
+ command line arguments."
345
+ exit 1
346
+ ;;
347
+
348
+ *)
349
+ echo 1>&2 "\
350
+ WARNING: \`$1' is needed, and is $msg.
351
+ You might have modified some files without having the
352
+ proper tools for further handling them. Check the \`README' file,
353
+ it often tells you about the needed prerequisites for installing
354
+ this package. You may also peek at any GNU archive site, in case
355
+ some other package would contain this missing \`$1' program."
356
+ exit 1
357
+ ;;
358
+ esac
359
+
360
+ exit 0
361
+
362
+ # Local variables:
363
+ # eval: (add-hook 'write-file-hooks 'time-stamp)
364
+ # time-stamp-start: "scriptversion="
365
+ # time-stamp-format: "%:y-%02m-%02d.%02H"
366
+ # time-stamp-end: "$"
367
+ # End:
mosesdecoder/contrib/lmserver/srilm.h ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #ifndef lmserver_srilm_h
2
+ #define lmserver_srilm_h
3
+
4
+ void srilm_init(const char* fname, int order);
5
+ int srilm_getvoc(const char* word);
6
+ float srilm_wordprob(int, int*);
7
+
8
+ #endif
mosesdecoder/contrib/tmcombine/README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tmcombine - a tool for Moses translation model combination
2
+
3
+ Author: Rico Sennrich <sennrich [AT] cl.uzh.ch>
4
+
5
+ ABOUT
6
+ -----
7
+
8
+ This program handles the combination of Moses phrase tables, either through
9
+ linear interpolation of the phrase translation probabilities/lexical weights,
10
+ or through a recomputation based on the (weighted) combined counts.
11
+
12
+ It also supports an automatic search for weights that minimize the cross-entropy
13
+ between the model and a tuning set of word/phrase alignments.
14
+
15
+
16
+ REQUIREMENTS
17
+ ------------
18
+
19
+ The script requires Python >= 2.6.
20
+ SciPy is recommended. If it is missing, an ad-hoc hill-climbing optimizer will be used (which may be slower, but is actually recommended for PyPy and/or a high number of models).
21
+ On Debian-based systems, you can install SciPy from the repository:
22
+ sudo apt-get install python-scipy
23
+
24
+
25
+ USAGE
26
+ -----
27
+
28
+ for usage information, run
29
+ ./tmcombine.py -h
30
+
31
+ Two basic command line examples:
32
+
33
+ linearly interpolate two translation models with fixed weights:
34
+ ./tmcombine.py combine_given_weights test/model1 test/model2 -w "0.1,0.9;0.1,1;0.2,0.8;0.5,0.5" -o test/phrase-table_test2
35
+
36
+ do a count-based combination of two translation models with weights that minimize perplexity on a set of reference phrase pairs.
37
+ ./tmcombine.py combine_given_tuning_set test/model1 test/model2 -o test/phrase-table_test5 -m counts -r test/extract
38
+
39
+ Typically, you have to specify one action out of the following:
40
+
41
+ - `combine_given_weights`: write a new phrase table with defined weights
42
+
43
+ - `combine_given_tuning_set`: write a new phrase table, using the weights that minimize cross-entropy on a tuning set
44
+
45
+ - `compare_cross_entropies`: print cross-entropies for each model/feature, using the intersection of phrase pairs.
46
+
47
+ - `compute_cross_entropy`: return cross-entropy for a tuning set, a set of models and a set of weights.
48
+
49
+ - `return_best_cross_entropy`: return the set of weights and cross-entropy that is optimal for a tuning set and a set of models.
50
+
51
+ You can check the docstrings of `Combine_TMs()` for more information and find some example commands in the function `test()`.
52
+ Some configuration options (i.e. normalization of linear interpolation) are not accessible from the command line.
53
+ You can gain a bit more flexibility by writing/modifying python code that initializes `Combine_TMs()` with your desired arguments, or by just fiddling with the default values in the script.
54
+
55
+ Regression tests (check if the output files (`test/phrase-table_testN`) differ from the files in the repositorys):
56
+ ./tmcombine.py test
57
+
58
+ FURTHER NOTES
59
+ -------------
60
+
61
+ - Different combination algorithms require different statistics. To be on the safe side, use the option and `-write-lexical-counts` when training models.
62
+
63
+ - The script assumes that phrase tables are sorted (to allow incremental, more memory-friendly processing). Sort the tables with `LC_ALL=C`. Phrase tables produced by Moses are sorted correctly.
64
+
65
+ - Some configurations require additional statistics that are loaded in memory (lexical tables; complete list of target phrases).
66
+ If memory consumption is a problem, use the option --lowmem (slightly slower and writes temporary files to disk), or consider pruning your phrase table before combining (e.g. using Johnson et al. 2007).
67
+
68
+ - The script can read/write gzipped files, but the Python implementation is slow. You're better off unzipping the files on the command line and working with the unzipped files. The script will automatically search for the unzipped file first, and for the gzipped file if the former doesn't exist.
69
+
70
+ - The cross-entropy estimation assumes that phrase tables contain true probability distributions (i.e. a probability mass of 1 for each conditional probability distribution). If this is not true, the results may be skewed.
71
+
72
+ - Unknown phrase pairs are not considered for the cross-entropy estimation. A comparison of models with different vocabularies may be misleading.
73
+
74
+ - Don't directly compare cross-entropies obtained from a combination with different modes. Depending on how some corner cases are treated, linear interpolation does not distribute the full probability mass and thus shows higher (i.e. worse) cross-entropies.
75
+
76
+
77
+ REFERENCES
78
+ ----------
79
+
80
+ The algorithms are described in
81
+
82
+ Sennrich, Rico (2012). Perplexity Minimization for Translation Model Domain Adaptation in Statistical Machine Translation. In: Proceedings of EACL 2012.
83
+
84
+ The evaluated algorithms are:
85
+
86
+ - linear interpolation (naive): default
87
+ - linear interpolation (modified): use options `--normalized` and `--recompute_lexweights`
88
+ - weighted counts: use option `-m counts`
mosesdecoder/contrib/tmcombine/argparse.py ADDED
@@ -0,0 +1,2382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Author: Steven J. Bethard <steven.bethard@gmail.com>.
2
+
3
+ """Command-line parsing library
4
+
5
+ This module is an optparse-inspired command-line parsing library that:
6
+
7
+ - handles both optional and positional arguments
8
+ - produces highly informative usage messages
9
+ - supports parsers that dispatch to sub-parsers
10
+
11
+ The following is a simple usage example that sums integers from the
12
+ command-line and writes the result to a file::
13
+
14
+ parser = argparse.ArgumentParser(
15
+ description='sum the integers at the command line')
16
+ parser.add_argument(
17
+ 'integers', metavar='int', nargs='+', type=int,
18
+ help='an integer to be summed')
19
+ parser.add_argument(
20
+ '--log', default=sys.stdout, type=argparse.FileType('w'),
21
+ help='the file where the sum should be written')
22
+ args = parser.parse_args()
23
+ args.log.write('%s' % sum(args.integers))
24
+ args.log.close()
25
+
26
+ The module contains the following public classes:
27
+
28
+ - ArgumentParser -- The main entry point for command-line parsing. As the
29
+ example above shows, the add_argument() method is used to populate
30
+ the parser with actions for optional and positional arguments. Then
31
+ the parse_args() method is invoked to convert the args at the
32
+ command-line into an object with attributes.
33
+
34
+ - ArgumentError -- The exception raised by ArgumentParser objects when
35
+ there are errors with the parser's actions. Errors raised while
36
+ parsing the command-line are caught by ArgumentParser and emitted
37
+ as command-line messages.
38
+
39
+ - FileType -- A factory for defining types of files to be created. As the
40
+ example above shows, instances of FileType are typically passed as
41
+ the type= argument of add_argument() calls.
42
+
43
+ - Action -- The base class for parser actions. Typically actions are
44
+ selected by passing strings like 'store_true' or 'append_const' to
45
+ the action= argument of add_argument(). However, for greater
46
+ customization of ArgumentParser actions, subclasses of Action may
47
+ be defined and passed as the action= argument.
48
+
49
+ - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter,
50
+ ArgumentDefaultsHelpFormatter -- Formatter classes which
51
+ may be passed as the formatter_class= argument to the
52
+ ArgumentParser constructor. HelpFormatter is the default,
53
+ RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser
54
+ not to change the formatting for help text, and
55
+ ArgumentDefaultsHelpFormatter adds information about argument defaults
56
+ to the help.
57
+
58
+ All other classes in this module are considered implementation details.
59
+ (Also note that HelpFormatter and RawDescriptionHelpFormatter are only
60
+ considered public as object names -- the API of the formatter objects is
61
+ still considered an implementation detail.)
62
+ """
63
+
64
+ __version__ = '1.1'
65
+ __all__ = [
66
+ 'ArgumentParser',
67
+ 'ArgumentError',
68
+ 'ArgumentTypeError',
69
+ 'FileType',
70
+ 'HelpFormatter',
71
+ 'ArgumentDefaultsHelpFormatter',
72
+ 'RawDescriptionHelpFormatter',
73
+ 'RawTextHelpFormatter',
74
+ 'MetavarTypeHelpFormatter',
75
+ 'Namespace',
76
+ 'Action',
77
+ 'ONE_OR_MORE',
78
+ 'OPTIONAL',
79
+ 'PARSER',
80
+ 'REMAINDER',
81
+ 'SUPPRESS',
82
+ 'ZERO_OR_MORE',
83
+ ]
84
+
85
+
86
+ import collections as _collections
87
+ import copy as _copy
88
+ import os as _os
89
+ import re as _re
90
+ import sys as _sys
91
+ import textwrap as _textwrap
92
+
93
+ from gettext import gettext as _, ngettext
94
+
95
+
96
+ SUPPRESS = '==SUPPRESS=='
97
+
98
+ OPTIONAL = '?'
99
+ ZERO_OR_MORE = '*'
100
+ ONE_OR_MORE = '+'
101
+ PARSER = 'A...'
102
+ REMAINDER = '...'
103
+ _UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args'
104
+
105
+ # =============================
106
+ # Utility functions and classes
107
+ # =============================
108
+
109
+ class _AttributeHolder(object):
110
+ """Abstract base class that provides __repr__.
111
+
112
+ The __repr__ method returns a string in the format::
113
+ ClassName(attr=name, attr=name, ...)
114
+ The attributes are determined either by a class-level attribute,
115
+ '_kwarg_names', or by inspecting the instance __dict__.
116
+ """
117
+
118
+ def __repr__(self):
119
+ type_name = type(self).__name__
120
+ arg_strings = []
121
+ for arg in self._get_args():
122
+ arg_strings.append(repr(arg))
123
+ for name, value in self._get_kwargs():
124
+ arg_strings.append('%s=%r' % (name, value))
125
+ return '%s(%s)' % (type_name, ', '.join(arg_strings))
126
+
127
+ def _get_kwargs(self):
128
+ return sorted(self.__dict__.items())
129
+
130
+ def _get_args(self):
131
+ return []
132
+
133
+
134
+ def _ensure_value(namespace, name, value):
135
+ if getattr(namespace, name, None) is None:
136
+ setattr(namespace, name, value)
137
+ return getattr(namespace, name)
138
+
139
+
140
+ # ===============
141
+ # Formatting Help
142
+ # ===============
143
+
144
+ class HelpFormatter(object):
145
+ """Formatter for generating usage messages and argument help strings.
146
+
147
+ Only the name of this class is considered a public API. All the methods
148
+ provided by the class are considered an implementation detail.
149
+ """
150
+
151
+ def __init__(self,
152
+ prog,
153
+ indent_increment=2,
154
+ max_help_position=24,
155
+ width=None):
156
+
157
+ # default setting for width
158
+ if width is None:
159
+ try:
160
+ width = int(_os.environ['COLUMNS'])
161
+ except (KeyError, ValueError):
162
+ width = 80
163
+ width -= 2
164
+
165
+ self._prog = prog
166
+ self._indent_increment = indent_increment
167
+ self._max_help_position = max_help_position
168
+ self._width = width
169
+
170
+ self._current_indent = 0
171
+ self._level = 0
172
+ self._action_max_length = 0
173
+
174
+ self._root_section = self._Section(self, None)
175
+ self._current_section = self._root_section
176
+
177
+ self._whitespace_matcher = _re.compile(r'\s+')
178
+ self._long_break_matcher = _re.compile(r'\n\n\n+')
179
+
180
+ # ===============================
181
+ # Section and indentation methods
182
+ # ===============================
183
+ def _indent(self):
184
+ self._current_indent += self._indent_increment
185
+ self._level += 1
186
+
187
+ def _dedent(self):
188
+ self._current_indent -= self._indent_increment
189
+ assert self._current_indent >= 0, 'Indent decreased below 0.'
190
+ self._level -= 1
191
+
192
+ class _Section(object):
193
+
194
+ def __init__(self, formatter, parent, heading=None):
195
+ self.formatter = formatter
196
+ self.parent = parent
197
+ self.heading = heading
198
+ self.items = []
199
+
200
+ def format_help(self):
201
+ # format the indented section
202
+ if self.parent is not None:
203
+ self.formatter._indent()
204
+ join = self.formatter._join_parts
205
+ for func, args in self.items:
206
+ func(*args)
207
+ item_help = join([func(*args) for func, args in self.items])
208
+ if self.parent is not None:
209
+ self.formatter._dedent()
210
+
211
+ # return nothing if the section was empty
212
+ if not item_help:
213
+ return ''
214
+
215
+ # add the heading if the section was non-empty
216
+ if self.heading is not SUPPRESS and self.heading is not None:
217
+ current_indent = self.formatter._current_indent
218
+ heading = '%*s%s:\n' % (current_indent, '', self.heading)
219
+ else:
220
+ heading = ''
221
+
222
+ # join the section-initial newline, the heading and the help
223
+ return join(['\n', heading, item_help, '\n'])
224
+
225
+ def _add_item(self, func, args):
226
+ self._current_section.items.append((func, args))
227
+
228
+ # ========================
229
+ # Message building methods
230
+ # ========================
231
+ def start_section(self, heading):
232
+ self._indent()
233
+ section = self._Section(self, self._current_section, heading)
234
+ self._add_item(section.format_help, [])
235
+ self._current_section = section
236
+
237
+ def end_section(self):
238
+ self._current_section = self._current_section.parent
239
+ self._dedent()
240
+
241
+ def add_text(self, text):
242
+ if text is not SUPPRESS and text is not None:
243
+ self._add_item(self._format_text, [text])
244
+
245
+ def add_usage(self, usage, actions, groups, prefix=None):
246
+ if usage is not SUPPRESS:
247
+ args = usage, actions, groups, prefix
248
+ self._add_item(self._format_usage, args)
249
+
250
+ def add_argument(self, action):
251
+ if action.help is not SUPPRESS:
252
+
253
+ # find all invocations
254
+ get_invocation = self._format_action_invocation
255
+ invocations = [get_invocation(action)]
256
+ for subaction in self._iter_indented_subactions(action):
257
+ invocations.append(get_invocation(subaction))
258
+
259
+ # update the maximum item length
260
+ invocation_length = max([len(s) for s in invocations])
261
+ action_length = invocation_length + self._current_indent
262
+ self._action_max_length = max(self._action_max_length,
263
+ action_length)
264
+
265
+ # add the item to the list
266
+ self._add_item(self._format_action, [action])
267
+
268
+ def add_arguments(self, actions):
269
+ for action in actions:
270
+ self.add_argument(action)
271
+
272
+ # =======================
273
+ # Help-formatting methods
274
+ # =======================
275
+ def format_help(self):
276
+ help = self._root_section.format_help()
277
+ if help:
278
+ help = self._long_break_matcher.sub('\n\n', help)
279
+ help = help.strip('\n') + '\n'
280
+ return help
281
+
282
+ def _join_parts(self, part_strings):
283
+ return ''.join([part
284
+ for part in part_strings
285
+ if part and part is not SUPPRESS])
286
+
287
+ def _format_usage(self, usage, actions, groups, prefix):
288
+ if prefix is None:
289
+ prefix = _('usage: ')
290
+
291
+ # if usage is specified, use that
292
+ if usage is not None:
293
+ usage = usage % dict(prog=self._prog)
294
+
295
+ # if no optionals or positionals are available, usage is just prog
296
+ elif usage is None and not actions:
297
+ usage = '%(prog)s' % dict(prog=self._prog)
298
+
299
+ # if optionals and positionals are available, calculate usage
300
+ elif usage is None:
301
+ prog = '%(prog)s' % dict(prog=self._prog)
302
+
303
+ # split optionals from positionals
304
+ optionals = []
305
+ positionals = []
306
+ for action in actions:
307
+ if action.option_strings:
308
+ optionals.append(action)
309
+ else:
310
+ positionals.append(action)
311
+
312
+ # build full usage string
313
+ format = self._format_actions_usage
314
+ action_usage = format(optionals + positionals, groups)
315
+ usage = ' '.join([s for s in [prog, action_usage] if s])
316
+
317
+ # wrap the usage parts if it's too long
318
+ text_width = self._width - self._current_indent
319
+ if len(prefix) + len(usage) > text_width:
320
+
321
+ # break usage into wrappable parts
322
+ part_regexp = r'\(.*?\)+|\[.*?\]+|\S+'
323
+ opt_usage = format(optionals, groups)
324
+ pos_usage = format(positionals, groups)
325
+ opt_parts = _re.findall(part_regexp, opt_usage)
326
+ pos_parts = _re.findall(part_regexp, pos_usage)
327
+ assert ' '.join(opt_parts) == opt_usage
328
+ assert ' '.join(pos_parts) == pos_usage
329
+
330
+ # helper for wrapping lines
331
+ def get_lines(parts, indent, prefix=None):
332
+ lines = []
333
+ line = []
334
+ if prefix is not None:
335
+ line_len = len(prefix) - 1
336
+ else:
337
+ line_len = len(indent) - 1
338
+ for part in parts:
339
+ if line_len + 1 + len(part) > text_width:
340
+ lines.append(indent + ' '.join(line))
341
+ line = []
342
+ line_len = len(indent) - 1
343
+ line.append(part)
344
+ line_len += len(part) + 1
345
+ if line:
346
+ lines.append(indent + ' '.join(line))
347
+ if prefix is not None:
348
+ lines[0] = lines[0][len(indent):]
349
+ return lines
350
+
351
+ # if prog is short, follow it with optionals or positionals
352
+ if len(prefix) + len(prog) <= 0.75 * text_width:
353
+ indent = ' ' * (len(prefix) + len(prog) + 1)
354
+ if opt_parts:
355
+ lines = get_lines([prog] + opt_parts, indent, prefix)
356
+ lines.extend(get_lines(pos_parts, indent))
357
+ elif pos_parts:
358
+ lines = get_lines([prog] + pos_parts, indent, prefix)
359
+ else:
360
+ lines = [prog]
361
+
362
+ # if prog is long, put it on its own line
363
+ else:
364
+ indent = ' ' * len(prefix)
365
+ parts = opt_parts + pos_parts
366
+ lines = get_lines(parts, indent)
367
+ if len(lines) > 1:
368
+ lines = []
369
+ lines.extend(get_lines(opt_parts, indent))
370
+ lines.extend(get_lines(pos_parts, indent))
371
+ lines = [prog] + lines
372
+
373
+ # join lines into usage
374
+ usage = '\n'.join(lines)
375
+
376
+ # prefix with 'usage:'
377
+ return '%s%s\n\n' % (prefix, usage)
378
+
379
+ def _format_actions_usage(self, actions, groups):
380
+ # find group indices and identify actions in groups
381
+ group_actions = set()
382
+ inserts = {}
383
+ for group in groups:
384
+ try:
385
+ start = actions.index(group._group_actions[0])
386
+ except ValueError:
387
+ continue
388
+ else:
389
+ end = start + len(group._group_actions)
390
+ if actions[start:end] == group._group_actions:
391
+ for action in group._group_actions:
392
+ group_actions.add(action)
393
+ if not group.required:
394
+ if start in inserts:
395
+ inserts[start] += ' ['
396
+ else:
397
+ inserts[start] = '['
398
+ inserts[end] = ']'
399
+ else:
400
+ if start in inserts:
401
+ inserts[start] += ' ('
402
+ else:
403
+ inserts[start] = '('
404
+ inserts[end] = ')'
405
+ for i in range(start + 1, end):
406
+ inserts[i] = '|'
407
+
408
+ # collect all actions format strings
409
+ parts = []
410
+ for i, action in enumerate(actions):
411
+
412
+ # suppressed arguments are marked with None
413
+ # remove | separators for suppressed arguments
414
+ if action.help is SUPPRESS:
415
+ parts.append(None)
416
+ if inserts.get(i) == '|':
417
+ inserts.pop(i)
418
+ elif inserts.get(i + 1) == '|':
419
+ inserts.pop(i + 1)
420
+
421
+ # produce all arg strings
422
+ elif not action.option_strings:
423
+ default = self._get_default_metavar_for_positional(action)
424
+ part = self._format_args(action, default)
425
+
426
+ # if it's in a group, strip the outer []
427
+ if action in group_actions:
428
+ if part[0] == '[' and part[-1] == ']':
429
+ part = part[1:-1]
430
+
431
+ # add the action string to the list
432
+ parts.append(part)
433
+
434
+ # produce the first way to invoke the option in brackets
435
+ else:
436
+ option_string = action.option_strings[0]
437
+
438
+ # if the Optional doesn't take a value, format is:
439
+ # -s or --long
440
+ if action.nargs == 0:
441
+ part = '%s' % option_string
442
+
443
+ # if the Optional takes a value, format is:
444
+ # -s ARGS or --long ARGS
445
+ else:
446
+ default = self._get_default_metavar_for_optional(action)
447
+ args_string = self._format_args(action, default)
448
+ part = '%s %s' % (option_string, args_string)
449
+
450
+ # make it look optional if it's not required or in a group
451
+ if not action.required and action not in group_actions:
452
+ part = '[%s]' % part
453
+
454
+ # add the action string to the list
455
+ parts.append(part)
456
+
457
+ # insert things at the necessary indices
458
+ for i in sorted(inserts, reverse=True):
459
+ parts[i:i] = [inserts[i]]
460
+
461
+ # join all the action items with spaces
462
+ text = ' '.join([item for item in parts if item is not None])
463
+
464
+ # clean up separators for mutually exclusive groups
465
+ open = r'[\[(]'
466
+ close = r'[\])]'
467
+ text = _re.sub(r'(%s) ' % open, r'\1', text)
468
+ text = _re.sub(r' (%s)' % close, r'\1', text)
469
+ text = _re.sub(r'%s *%s' % (open, close), r'', text)
470
+ text = _re.sub(r'\(([^|]*)\)', r'\1', text)
471
+ text = text.strip()
472
+
473
+ # return the text
474
+ return text
475
+
476
+ def _format_text(self, text):
477
+ if '%(prog)' in text:
478
+ text = text % dict(prog=self._prog)
479
+ text_width = self._width - self._current_indent
480
+ indent = ' ' * self._current_indent
481
+ return self._fill_text(text, text_width, indent) + '\n\n'
482
+
483
+ def _format_action(self, action):
484
+ # determine the required width and the entry label
485
+ help_position = min(self._action_max_length + 2,
486
+ self._max_help_position)
487
+ help_width = self._width - help_position
488
+ action_width = help_position - self._current_indent - 2
489
+ action_header = self._format_action_invocation(action)
490
+
491
+ # ho nelp; start on same line and add a final newline
492
+ if not action.help:
493
+ tup = self._current_indent, '', action_header
494
+ action_header = '%*s%s\n' % tup
495
+
496
+ # short action name; start on the same line and pad two spaces
497
+ elif len(action_header) <= action_width:
498
+ tup = self._current_indent, '', action_width, action_header
499
+ action_header = '%*s%-*s ' % tup
500
+ indent_first = 0
501
+
502
+ # long action name; start on the next line
503
+ else:
504
+ tup = self._current_indent, '', action_header
505
+ action_header = '%*s%s\n' % tup
506
+ indent_first = help_position
507
+
508
+ # collect the pieces of the action help
509
+ parts = [action_header]
510
+
511
+ # if there was help for the action, add lines of help text
512
+ if action.help:
513
+ help_text = self._expand_help(action)
514
+ help_lines = self._split_lines(help_text, help_width)
515
+ parts.append('%*s%s\n' % (indent_first, '', help_lines[0]))
516
+ for line in help_lines[1:]:
517
+ parts.append('%*s%s\n' % (help_position, '', line))
518
+
519
+ # or add a newline if the description doesn't end with one
520
+ elif not action_header.endswith('\n'):
521
+ parts.append('\n')
522
+
523
+ # if there are any sub-actions, add their help as well
524
+ for subaction in self._iter_indented_subactions(action):
525
+ parts.append(self._format_action(subaction))
526
+
527
+ # return a single string
528
+ return self._join_parts(parts)
529
+
530
+ def _format_action_invocation(self, action):
531
+ if not action.option_strings:
532
+ default = self._get_default_metavar_for_positional(action)
533
+ metavar, = self._metavar_formatter(action, default)(1)
534
+ return metavar
535
+
536
+ else:
537
+ parts = []
538
+
539
+ # if the Optional doesn't take a value, format is:
540
+ # -s, --long
541
+ if action.nargs == 0:
542
+ parts.extend(action.option_strings)
543
+
544
+ # if the Optional takes a value, format is:
545
+ # -s ARGS, --long ARGS
546
+ else:
547
+ default = self._get_default_metavar_for_optional(action)
548
+ args_string = self._format_args(action, default)
549
+ for option_string in action.option_strings:
550
+ parts.append('%s %s' % (option_string, args_string))
551
+
552
+ return ', '.join(parts)
553
+
554
+ def _metavar_formatter(self, action, default_metavar):
555
+ if action.metavar is not None:
556
+ result = action.metavar
557
+ elif action.choices is not None:
558
+ choice_strs = [str(choice) for choice in action.choices]
559
+ result = '{%s}' % ','.join(choice_strs)
560
+ else:
561
+ result = default_metavar
562
+
563
+ def format(tuple_size):
564
+ if isinstance(result, tuple):
565
+ return result
566
+ else:
567
+ return (result, ) * tuple_size
568
+ return format
569
+
570
+ def _format_args(self, action, default_metavar):
571
+ get_metavar = self._metavar_formatter(action, default_metavar)
572
+ if action.nargs is None:
573
+ result = '%s' % get_metavar(1)
574
+ elif action.nargs == OPTIONAL:
575
+ result = '[%s]' % get_metavar(1)
576
+ elif action.nargs == ZERO_OR_MORE:
577
+ result = '[%s [%s ...]]' % get_metavar(2)
578
+ elif action.nargs == ONE_OR_MORE:
579
+ result = '%s [%s ...]' % get_metavar(2)
580
+ elif action.nargs == REMAINDER:
581
+ result = '...'
582
+ elif action.nargs == PARSER:
583
+ result = '%s ...' % get_metavar(1)
584
+ else:
585
+ formats = ['%s' for _ in range(action.nargs)]
586
+ result = ' '.join(formats) % get_metavar(action.nargs)
587
+ return result
588
+
589
+ def _expand_help(self, action):
590
+ params = dict(vars(action), prog=self._prog)
591
+ for name in list(params):
592
+ if params[name] is SUPPRESS:
593
+ del params[name]
594
+ for name in list(params):
595
+ if hasattr(params[name], '__name__'):
596
+ params[name] = params[name].__name__
597
+ if params.get('choices') is not None:
598
+ choices_str = ', '.join([str(c) for c in params['choices']])
599
+ params['choices'] = choices_str
600
+ return self._get_help_string(action) % params
601
+
602
+ def _iter_indented_subactions(self, action):
603
+ try:
604
+ get_subactions = action._get_subactions
605
+ except AttributeError:
606
+ pass
607
+ else:
608
+ self._indent()
609
+ for subaction in get_subactions():
610
+ yield subaction
611
+ self._dedent()
612
+
613
+ def _split_lines(self, text, width):
614
+ text = self._whitespace_matcher.sub(' ', text).strip()
615
+ return _textwrap.wrap(text, width)
616
+
617
+ def _fill_text(self, text, width, indent):
618
+ text = self._whitespace_matcher.sub(' ', text).strip()
619
+ return _textwrap.fill(text, width, initial_indent=indent,
620
+ subsequent_indent=indent)
621
+
622
+ def _get_help_string(self, action):
623
+ return action.help
624
+
625
+ def _get_default_metavar_for_optional(self, action):
626
+ return action.dest.upper()
627
+
628
+ def _get_default_metavar_for_positional(self, action):
629
+ return action.dest
630
+
631
+
632
+ class RawDescriptionHelpFormatter(HelpFormatter):
633
+ """Help message formatter which retains any formatting in descriptions.
634
+
635
+ Only the name of this class is considered a public API. All the methods
636
+ provided by the class are considered an implementation detail.
637
+ """
638
+
639
+ def _fill_text(self, text, width, indent):
640
+ return ''.join(indent + line for line in text.splitlines(keepends=True))
641
+
642
+
643
+ class RawTextHelpFormatter(RawDescriptionHelpFormatter):
644
+ """Help message formatter which retains formatting of all help text.
645
+
646
+ Only the name of this class is considered a public API. All the methods
647
+ provided by the class are considered an implementation detail.
648
+ """
649
+
650
+ def _split_lines(self, text, width):
651
+ return text.splitlines()
652
+
653
+
654
+ class ArgumentDefaultsHelpFormatter(HelpFormatter):
655
+ """Help message formatter which adds default values to argument help.
656
+
657
+ Only the name of this class is considered a public API. All the methods
658
+ provided by the class are considered an implementation detail.
659
+ """
660
+
661
+ def _get_help_string(self, action):
662
+ help = action.help
663
+ if '%(default)' not in action.help:
664
+ if action.default is not SUPPRESS:
665
+ defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
666
+ if action.option_strings or action.nargs in defaulting_nargs:
667
+ help += ' (default: %(default)s)'
668
+ return help
669
+
670
+
671
+ class MetavarTypeHelpFormatter(HelpFormatter):
672
+ """Help message formatter which uses the argument 'type' as the default
673
+ metavar value (instead of the argument 'dest')
674
+
675
+ Only the name of this class is considered a public API. All the methods
676
+ provided by the class are considered an implementation detail.
677
+ """
678
+
679
+ def _get_default_metavar_for_optional(self, action):
680
+ return action.type.__name__
681
+
682
+ def _get_default_metavar_for_positional(self, action):
683
+ return action.type.__name__
684
+
685
+
686
+
687
+ # =====================
688
+ # Options and Arguments
689
+ # =====================
690
+
691
+ def _get_action_name(argument):
692
+ if argument is None:
693
+ return None
694
+ elif argument.option_strings:
695
+ return '/'.join(argument.option_strings)
696
+ elif argument.metavar not in (None, SUPPRESS):
697
+ return argument.metavar
698
+ elif argument.dest not in (None, SUPPRESS):
699
+ return argument.dest
700
+ else:
701
+ return None
702
+
703
+
704
+ class ArgumentError(Exception):
705
+ """An error from creating or using an argument (optional or positional).
706
+
707
+ The string value of this exception is the message, augmented with
708
+ information about the argument that caused it.
709
+ """
710
+
711
+ def __init__(self, argument, message):
712
+ self.argument_name = _get_action_name(argument)
713
+ self.message = message
714
+
715
+ def __str__(self):
716
+ if self.argument_name is None:
717
+ format = '%(message)s'
718
+ else:
719
+ format = 'argument %(argument_name)s: %(message)s'
720
+ return format % dict(message=self.message,
721
+ argument_name=self.argument_name)
722
+
723
+
724
+ class ArgumentTypeError(Exception):
725
+ """An error from trying to convert a command line string to a type."""
726
+ pass
727
+
728
+
729
+ # ==============
730
+ # Action classes
731
+ # ==============
732
+
733
+ class Action(_AttributeHolder):
734
+ """Information about how to convert command line strings to Python objects.
735
+
736
+ Action objects are used by an ArgumentParser to represent the information
737
+ needed to parse a single argument from one or more strings from the
738
+ command line. The keyword arguments to the Action constructor are also
739
+ all attributes of Action instances.
740
+
741
+ Keyword Arguments:
742
+
743
+ - option_strings -- A list of command-line option strings which
744
+ should be associated with this action.
745
+
746
+ - dest -- The name of the attribute to hold the created object(s)
747
+
748
+ - nargs -- The number of command-line arguments that should be
749
+ consumed. By default, one argument will be consumed and a single
750
+ value will be produced. Other values include:
751
+ - N (an integer) consumes N arguments (and produces a list)
752
+ - '?' consumes zero or one arguments
753
+ - '*' consumes zero or more arguments (and produces a list)
754
+ - '+' consumes one or more arguments (and produces a list)
755
+ Note that the difference between the default and nargs=1 is that
756
+ with the default, a single value will be produced, while with
757
+ nargs=1, a list containing a single value will be produced.
758
+
759
+ - const -- The value to be produced if the option is specified and the
760
+ option uses an action that takes no values.
761
+
762
+ - default -- The value to be produced if the option is not specified.
763
+
764
+ - type -- The type which the command-line arguments should be converted
765
+ to, should be one of 'string', 'int', 'float', 'complex' or a
766
+ callable object that accepts a single string argument. If None,
767
+ 'string' is assumed.
768
+
769
+ - choices -- A container of values that should be allowed. If not None,
770
+ after a command-line argument has been converted to the appropriate
771
+ type, an exception will be raised if it is not a member of this
772
+ collection.
773
+
774
+ - required -- True if the action must always be specified at the
775
+ command line. This is only meaningful for optional command-line
776
+ arguments.
777
+
778
+ - help -- The help string describing the argument.
779
+
780
+ - metavar -- The name to be used for the option's argument with the
781
+ help string. If None, the 'dest' value will be used as the name.
782
+ """
783
+
784
+ def __init__(self,
785
+ option_strings,
786
+ dest,
787
+ nargs=None,
788
+ const=None,
789
+ default=None,
790
+ type=None,
791
+ choices=None,
792
+ required=False,
793
+ help=None,
794
+ metavar=None):
795
+ self.option_strings = option_strings
796
+ self.dest = dest
797
+ self.nargs = nargs
798
+ self.const = const
799
+ self.default = default
800
+ self.type = type
801
+ self.choices = choices
802
+ self.required = required
803
+ self.help = help
804
+ self.metavar = metavar
805
+
806
+ def _get_kwargs(self):
807
+ names = [
808
+ 'option_strings',
809
+ 'dest',
810
+ 'nargs',
811
+ 'const',
812
+ 'default',
813
+ 'type',
814
+ 'choices',
815
+ 'help',
816
+ 'metavar',
817
+ ]
818
+ return [(name, getattr(self, name)) for name in names]
819
+
820
+ def __call__(self, parser, namespace, values, option_string=None):
821
+ raise NotImplementedError(_('.__call__() not defined'))
822
+
823
+
824
+ class _StoreAction(Action):
825
+
826
+ def __init__(self,
827
+ option_strings,
828
+ dest,
829
+ nargs=None,
830
+ const=None,
831
+ default=None,
832
+ type=None,
833
+ choices=None,
834
+ required=False,
835
+ help=None,
836
+ metavar=None):
837
+ if nargs == 0:
838
+ raise ValueError('nargs for store actions must be > 0; if you '
839
+ 'have nothing to store, actions such as store '
840
+ 'true or store const may be more appropriate')
841
+ if const is not None and nargs != OPTIONAL:
842
+ raise ValueError('nargs must be %r to supply const' % OPTIONAL)
843
+ super(_StoreAction, self).__init__(
844
+ option_strings=option_strings,
845
+ dest=dest,
846
+ nargs=nargs,
847
+ const=const,
848
+ default=default,
849
+ type=type,
850
+ choices=choices,
851
+ required=required,
852
+ help=help,
853
+ metavar=metavar)
854
+
855
+ def __call__(self, parser, namespace, values, option_string=None):
856
+ setattr(namespace, self.dest, values)
857
+
858
+
859
+ class _StoreConstAction(Action):
860
+
861
+ def __init__(self,
862
+ option_strings,
863
+ dest,
864
+ const,
865
+ default=None,
866
+ required=False,
867
+ help=None,
868
+ metavar=None):
869
+ super(_StoreConstAction, self).__init__(
870
+ option_strings=option_strings,
871
+ dest=dest,
872
+ nargs=0,
873
+ const=const,
874
+ default=default,
875
+ required=required,
876
+ help=help)
877
+
878
+ def __call__(self, parser, namespace, values, option_string=None):
879
+ setattr(namespace, self.dest, self.const)
880
+
881
+
882
+ class _StoreTrueAction(_StoreConstAction):
883
+
884
+ def __init__(self,
885
+ option_strings,
886
+ dest,
887
+ default=False,
888
+ required=False,
889
+ help=None):
890
+ super(_StoreTrueAction, self).__init__(
891
+ option_strings=option_strings,
892
+ dest=dest,
893
+ const=True,
894
+ default=default,
895
+ required=required,
896
+ help=help)
897
+
898
+
899
+ class _StoreFalseAction(_StoreConstAction):
900
+
901
+ def __init__(self,
902
+ option_strings,
903
+ dest,
904
+ default=True,
905
+ required=False,
906
+ help=None):
907
+ super(_StoreFalseAction, self).__init__(
908
+ option_strings=option_strings,
909
+ dest=dest,
910
+ const=False,
911
+ default=default,
912
+ required=required,
913
+ help=help)
914
+
915
+
916
+ class _AppendAction(Action):
917
+
918
+ def __init__(self,
919
+ option_strings,
920
+ dest,
921
+ nargs=None,
922
+ const=None,
923
+ default=None,
924
+ type=None,
925
+ choices=None,
926
+ required=False,
927
+ help=None,
928
+ metavar=None):
929
+ if nargs == 0:
930
+ raise ValueError('nargs for append actions must be > 0; if arg '
931
+ 'strings are not supplying the value to append, '
932
+ 'the append const action may be more appropriate')
933
+ if const is not None and nargs != OPTIONAL:
934
+ raise ValueError('nargs must be %r to supply const' % OPTIONAL)
935
+ super(_AppendAction, self).__init__(
936
+ option_strings=option_strings,
937
+ dest=dest,
938
+ nargs=nargs,
939
+ const=const,
940
+ default=default,
941
+ type=type,
942
+ choices=choices,
943
+ required=required,
944
+ help=help,
945
+ metavar=metavar)
946
+
947
+ def __call__(self, parser, namespace, values, option_string=None):
948
+ items = _copy.copy(_ensure_value(namespace, self.dest, []))
949
+ items.append(values)
950
+ setattr(namespace, self.dest, items)
951
+
952
+
953
+ class _AppendConstAction(Action):
954
+
955
+ def __init__(self,
956
+ option_strings,
957
+ dest,
958
+ const,
959
+ default=None,
960
+ required=False,
961
+ help=None,
962
+ metavar=None):
963
+ super(_AppendConstAction, self).__init__(
964
+ option_strings=option_strings,
965
+ dest=dest,
966
+ nargs=0,
967
+ const=const,
968
+ default=default,
969
+ required=required,
970
+ help=help,
971
+ metavar=metavar)
972
+
973
+ def __call__(self, parser, namespace, values, option_string=None):
974
+ items = _copy.copy(_ensure_value(namespace, self.dest, []))
975
+ items.append(self.const)
976
+ setattr(namespace, self.dest, items)
977
+
978
+
979
+ class _CountAction(Action):
980
+
981
+ def __init__(self,
982
+ option_strings,
983
+ dest,
984
+ default=None,
985
+ required=False,
986
+ help=None):
987
+ super(_CountAction, self).__init__(
988
+ option_strings=option_strings,
989
+ dest=dest,
990
+ nargs=0,
991
+ default=default,
992
+ required=required,
993
+ help=help)
994
+
995
+ def __call__(self, parser, namespace, values, option_string=None):
996
+ new_count = _ensure_value(namespace, self.dest, 0) + 1
997
+ setattr(namespace, self.dest, new_count)
998
+
999
+
1000
+ class _HelpAction(Action):
1001
+
1002
+ def __init__(self,
1003
+ option_strings,
1004
+ dest=SUPPRESS,
1005
+ default=SUPPRESS,
1006
+ help=None):
1007
+ super(_HelpAction, self).__init__(
1008
+ option_strings=option_strings,
1009
+ dest=dest,
1010
+ default=default,
1011
+ nargs=0,
1012
+ help=help)
1013
+
1014
+ def __call__(self, parser, namespace, values, option_string=None):
1015
+ parser.print_help()
1016
+ parser.exit()
1017
+
1018
+
1019
+ class _VersionAction(Action):
1020
+
1021
+ def __init__(self,
1022
+ option_strings,
1023
+ version=None,
1024
+ dest=SUPPRESS,
1025
+ default=SUPPRESS,
1026
+ help="show program's version number and exit"):
1027
+ super(_VersionAction, self).__init__(
1028
+ option_strings=option_strings,
1029
+ dest=dest,
1030
+ default=default,
1031
+ nargs=0,
1032
+ help=help)
1033
+ self.version = version
1034
+
1035
+ def __call__(self, parser, namespace, values, option_string=None):
1036
+ version = self.version
1037
+ if version is None:
1038
+ version = parser.version
1039
+ formatter = parser._get_formatter()
1040
+ formatter.add_text(version)
1041
+ parser.exit(message=formatter.format_help())
1042
+
1043
+
1044
+ class _SubParsersAction(Action):
1045
+
1046
+ class _ChoicesPseudoAction(Action):
1047
+
1048
+ def __init__(self, name, aliases, help):
1049
+ metavar = dest = name
1050
+ if aliases:
1051
+ metavar += ' (%s)' % ', '.join(aliases)
1052
+ sup = super(_SubParsersAction._ChoicesPseudoAction, self)
1053
+ sup.__init__(option_strings=[], dest=dest, help=help,
1054
+ metavar=metavar)
1055
+
1056
+ def __init__(self,
1057
+ option_strings,
1058
+ prog,
1059
+ parser_class,
1060
+ dest=SUPPRESS,
1061
+ help=None,
1062
+ metavar=None):
1063
+
1064
+ self._prog_prefix = prog
1065
+ self._parser_class = parser_class
1066
+ self._name_parser_map = _collections.OrderedDict()
1067
+ self._choices_actions = []
1068
+
1069
+ super(_SubParsersAction, self).__init__(
1070
+ option_strings=option_strings,
1071
+ dest=dest,
1072
+ nargs=PARSER,
1073
+ choices=self._name_parser_map,
1074
+ help=help,
1075
+ metavar=metavar)
1076
+
1077
+ def add_parser(self, name, **kwargs):
1078
+ # set prog from the existing prefix
1079
+ if kwargs.get('prog') is None:
1080
+ kwargs['prog'] = '%s %s' % (self._prog_prefix, name)
1081
+
1082
+ aliases = kwargs.pop('aliases', ())
1083
+
1084
+ # create a pseudo-action to hold the choice help
1085
+ if 'help' in kwargs:
1086
+ help = kwargs.pop('help')
1087
+ choice_action = self._ChoicesPseudoAction(name, aliases, help)
1088
+ self._choices_actions.append(choice_action)
1089
+
1090
+ # create the parser and add it to the map
1091
+ parser = self._parser_class(**kwargs)
1092
+ self._name_parser_map[name] = parser
1093
+
1094
+ # make parser available under aliases also
1095
+ for alias in aliases:
1096
+ self._name_parser_map[alias] = parser
1097
+
1098
+ return parser
1099
+
1100
+ def _get_subactions(self):
1101
+ return self._choices_actions
1102
+
1103
+ def __call__(self, parser, namespace, values, option_string=None):
1104
+ parser_name = values[0]
1105
+ arg_strings = values[1:]
1106
+
1107
+ # set the parser name if requested
1108
+ if self.dest is not SUPPRESS:
1109
+ setattr(namespace, self.dest, parser_name)
1110
+
1111
+ # select the parser
1112
+ try:
1113
+ parser = self._name_parser_map[parser_name]
1114
+ except KeyError:
1115
+ args = {'parser_name': parser_name,
1116
+ 'choices': ', '.join(self._name_parser_map)}
1117
+ msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args
1118
+ raise ArgumentError(self, msg)
1119
+
1120
+ # parse all the remaining options into the namespace
1121
+ # store any unrecognized options on the object, so that the top
1122
+ # level parser can decide what to do with them
1123
+ namespace, arg_strings = parser.parse_known_args(arg_strings, namespace)
1124
+ if arg_strings:
1125
+ vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, [])
1126
+ getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings)
1127
+
1128
+
1129
+ # ==============
1130
+ # Type classes
1131
+ # ==============
1132
+
1133
+ class FileType(object):
1134
+ """Factory for creating file object types
1135
+
1136
+ Instances of FileType are typically passed as type= arguments to the
1137
+ ArgumentParser add_argument() method.
1138
+
1139
+ Keyword Arguments:
1140
+ - mode -- A string indicating how the file is to be opened. Accepts the
1141
+ same values as the builtin open() function.
1142
+ - bufsize -- The file's desired buffer size. Accepts the same values as
1143
+ the builtin open() function.
1144
+ """
1145
+
1146
+ def __init__(self, mode='r', bufsize=-1):
1147
+ self._mode = mode
1148
+ self._bufsize = bufsize
1149
+
1150
+ def __call__(self, string):
1151
+ # the special argument "-" means sys.std{in,out}
1152
+ if string == '-':
1153
+ if 'r' in self._mode:
1154
+ return _sys.stdin
1155
+ elif 'w' in self._mode:
1156
+ return _sys.stdout
1157
+ else:
1158
+ msg = _('argument "-" with mode %r') % self._mode
1159
+ raise ValueError(msg)
1160
+
1161
+ # all other arguments are used as file names
1162
+ try:
1163
+ return open(string, self._mode, self._bufsize)
1164
+ except IOError as e:
1165
+ message = _("can't open '%s': %s")
1166
+ raise ArgumentTypeError(message % (string, e))
1167
+
1168
+ def __repr__(self):
1169
+ args = self._mode, self._bufsize
1170
+ args_str = ', '.join(repr(arg) for arg in args if arg != -1)
1171
+ return '%s(%s)' % (type(self).__name__, args_str)
1172
+
1173
+ # ===========================
1174
+ # Optional and Positional Parsing
1175
+ # ===========================
1176
+
1177
+ class Namespace(_AttributeHolder):
1178
+ """Simple object for storing attributes.
1179
+
1180
+ Implements equality by attribute names and values, and provides a simple
1181
+ string representation.
1182
+ """
1183
+
1184
+ def __init__(self, **kwargs):
1185
+ for name in kwargs:
1186
+ setattr(self, name, kwargs[name])
1187
+
1188
+ def __eq__(self, other):
1189
+ return vars(self) == vars(other)
1190
+
1191
+ def __ne__(self, other):
1192
+ return not (self == other)
1193
+
1194
+ def __contains__(self, key):
1195
+ return key in self.__dict__
1196
+
1197
+
1198
+ class _ActionsContainer(object):
1199
+
1200
+ def __init__(self,
1201
+ description,
1202
+ prefix_chars,
1203
+ argument_default,
1204
+ conflict_handler):
1205
+ super(_ActionsContainer, self).__init__()
1206
+
1207
+ self.description = description
1208
+ self.argument_default = argument_default
1209
+ self.prefix_chars = prefix_chars
1210
+ self.conflict_handler = conflict_handler
1211
+
1212
+ # set up registries
1213
+ self._registries = {}
1214
+
1215
+ # register actions
1216
+ self.register('action', None, _StoreAction)
1217
+ self.register('action', 'store', _StoreAction)
1218
+ self.register('action', 'store_const', _StoreConstAction)
1219
+ self.register('action', 'store_true', _StoreTrueAction)
1220
+ self.register('action', 'store_false', _StoreFalseAction)
1221
+ self.register('action', 'append', _AppendAction)
1222
+ self.register('action', 'append_const', _AppendConstAction)
1223
+ self.register('action', 'count', _CountAction)
1224
+ self.register('action', 'help', _HelpAction)
1225
+ self.register('action', 'version', _VersionAction)
1226
+ self.register('action', 'parsers', _SubParsersAction)
1227
+
1228
+ # raise an exception if the conflict handler is invalid
1229
+ self._get_handler()
1230
+
1231
+ # action storage
1232
+ self._actions = []
1233
+ self._option_string_actions = {}
1234
+
1235
+ # groups
1236
+ self._action_groups = []
1237
+ self._mutually_exclusive_groups = []
1238
+
1239
+ # defaults storage
1240
+ self._defaults = {}
1241
+
1242
+ # determines whether an "option" looks like a negative number
1243
+ self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$')
1244
+
1245
+ # whether or not there are any optionals that look like negative
1246
+ # numbers -- uses a list so it can be shared and edited
1247
+ self._has_negative_number_optionals = []
1248
+
1249
+ # ====================
1250
+ # Registration methods
1251
+ # ====================
1252
+ def register(self, registry_name, value, object):
1253
+ registry = self._registries.setdefault(registry_name, {})
1254
+ registry[value] = object
1255
+
1256
+ def _registry_get(self, registry_name, value, default=None):
1257
+ return self._registries[registry_name].get(value, default)
1258
+
1259
+ # ==================================
1260
+ # Namespace default accessor methods
1261
+ # ==================================
1262
+ def set_defaults(self, **kwargs):
1263
+ self._defaults.update(kwargs)
1264
+
1265
+ # if these defaults match any existing arguments, replace
1266
+ # the previous default on the object with the new one
1267
+ for action in self._actions:
1268
+ if action.dest in kwargs:
1269
+ action.default = kwargs[action.dest]
1270
+
1271
+ def get_default(self, dest):
1272
+ for action in self._actions:
1273
+ if action.dest == dest and action.default is not None:
1274
+ return action.default
1275
+ return self._defaults.get(dest, None)
1276
+
1277
+
1278
+ # =======================
1279
+ # Adding argument actions
1280
+ # =======================
1281
+ def add_argument(self, *args, **kwargs):
1282
+ """
1283
+ add_argument(dest, ..., name=value, ...)
1284
+ add_argument(option_string, option_string, ..., name=value, ...)
1285
+ """
1286
+
1287
+ # if no positional args are supplied or only one is supplied and
1288
+ # it doesn't look like an option string, parse a positional
1289
+ # argument
1290
+ chars = self.prefix_chars
1291
+ if not args or len(args) == 1 and args[0][0] not in chars:
1292
+ if args and 'dest' in kwargs:
1293
+ raise ValueError('dest supplied twice for positional argument')
1294
+ kwargs = self._get_positional_kwargs(*args, **kwargs)
1295
+
1296
+ # otherwise, we're adding an optional argument
1297
+ else:
1298
+ kwargs = self._get_optional_kwargs(*args, **kwargs)
1299
+
1300
+ # if no default was supplied, use the parser-level default
1301
+ if 'default' not in kwargs:
1302
+ dest = kwargs['dest']
1303
+ if dest in self._defaults:
1304
+ kwargs['default'] = self._defaults[dest]
1305
+ elif self.argument_default is not None:
1306
+ kwargs['default'] = self.argument_default
1307
+
1308
+ # create the action object, and add it to the parser
1309
+ action_class = self._pop_action_class(kwargs)
1310
+ if not callable(action_class):
1311
+ raise ValueError('unknown action "%s"' % (action_class,))
1312
+ action = action_class(**kwargs)
1313
+
1314
+ # raise an error if the action type is not callable
1315
+ type_func = self._registry_get('type', action.type, action.type)
1316
+ if not callable(type_func):
1317
+ raise ValueError('%r is not callable' % (type_func,))
1318
+
1319
+ # raise an error if the metavar does not match the type
1320
+ if hasattr(self, "_get_formatter"):
1321
+ try:
1322
+ self._get_formatter()._format_args(action, None)
1323
+ except TypeError:
1324
+ raise ValueError("length of metavar tuple does not match nargs")
1325
+
1326
+ return self._add_action(action)
1327
+
1328
+ def add_argument_group(self, *args, **kwargs):
1329
+ group = _ArgumentGroup(self, *args, **kwargs)
1330
+ self._action_groups.append(group)
1331
+ return group
1332
+
1333
+ def add_mutually_exclusive_group(self, **kwargs):
1334
+ group = _MutuallyExclusiveGroup(self, **kwargs)
1335
+ self._mutually_exclusive_groups.append(group)
1336
+ return group
1337
+
1338
+ def _add_action(self, action):
1339
+ # resolve any conflicts
1340
+ self._check_conflict(action)
1341
+
1342
+ # add to actions list
1343
+ self._actions.append(action)
1344
+ action.container = self
1345
+
1346
+ # index the action by any option strings it has
1347
+ for option_string in action.option_strings:
1348
+ self._option_string_actions[option_string] = action
1349
+
1350
+ # set the flag if any option strings look like negative numbers
1351
+ for option_string in action.option_strings:
1352
+ if self._negative_number_matcher.match(option_string):
1353
+ if not self._has_negative_number_optionals:
1354
+ self._has_negative_number_optionals.append(True)
1355
+
1356
+ # return the created action
1357
+ return action
1358
+
1359
+ def _remove_action(self, action):
1360
+ self._actions.remove(action)
1361
+
1362
+ def _add_container_actions(self, container):
1363
+ # collect groups by titles
1364
+ title_group_map = {}
1365
+ for group in self._action_groups:
1366
+ if group.title in title_group_map:
1367
+ msg = _('cannot merge actions - two groups are named %r')
1368
+ raise ValueError(msg % (group.title))
1369
+ title_group_map[group.title] = group
1370
+
1371
+ # map each action to its group
1372
+ group_map = {}
1373
+ for group in container._action_groups:
1374
+
1375
+ # if a group with the title exists, use that, otherwise
1376
+ # create a new group matching the container's group
1377
+ if group.title not in title_group_map:
1378
+ title_group_map[group.title] = self.add_argument_group(
1379
+ title=group.title,
1380
+ description=group.description,
1381
+ conflict_handler=group.conflict_handler)
1382
+
1383
+ # map the actions to their new group
1384
+ for action in group._group_actions:
1385
+ group_map[action] = title_group_map[group.title]
1386
+
1387
+ # add container's mutually exclusive groups
1388
+ # NOTE: if add_mutually_exclusive_group ever gains title= and
1389
+ # description= then this code will need to be expanded as above
1390
+ for group in container._mutually_exclusive_groups:
1391
+ mutex_group = self.add_mutually_exclusive_group(
1392
+ required=group.required)
1393
+
1394
+ # map the actions to their new mutex group
1395
+ for action in group._group_actions:
1396
+ group_map[action] = mutex_group
1397
+
1398
+ # add all actions to this container or their group
1399
+ for action in container._actions:
1400
+ group_map.get(action, self)._add_action(action)
1401
+
1402
+ def _get_positional_kwargs(self, dest, **kwargs):
1403
+ # make sure required is not specified
1404
+ if 'required' in kwargs:
1405
+ msg = _("'required' is an invalid argument for positionals")
1406
+ raise TypeError(msg)
1407
+
1408
+ # mark positional arguments as required if at least one is
1409
+ # always required
1410
+ if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]:
1411
+ kwargs['required'] = True
1412
+ if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs:
1413
+ kwargs['required'] = True
1414
+
1415
+ # return the keyword arguments with no option strings
1416
+ return dict(kwargs, dest=dest, option_strings=[])
1417
+
1418
+ def _get_optional_kwargs(self, *args, **kwargs):
1419
+ # determine short and long option strings
1420
+ option_strings = []
1421
+ long_option_strings = []
1422
+ for option_string in args:
1423
+ # error on strings that don't start with an appropriate prefix
1424
+ if not option_string[0] in self.prefix_chars:
1425
+ args = {'option': option_string,
1426
+ 'prefix_chars': self.prefix_chars}
1427
+ msg = _('invalid option string %(option)r: '
1428
+ 'must start with a character %(prefix_chars)r')
1429
+ raise ValueError(msg % args)
1430
+
1431
+ # strings starting with two prefix characters are long options
1432
+ option_strings.append(option_string)
1433
+ if option_string[0] in self.prefix_chars:
1434
+ if len(option_string) > 1:
1435
+ if option_string[1] in self.prefix_chars:
1436
+ long_option_strings.append(option_string)
1437
+
1438
+ # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x'
1439
+ dest = kwargs.pop('dest', None)
1440
+ if dest is None:
1441
+ if long_option_strings:
1442
+ dest_option_string = long_option_strings[0]
1443
+ else:
1444
+ dest_option_string = option_strings[0]
1445
+ dest = dest_option_string.lstrip(self.prefix_chars)
1446
+ if not dest:
1447
+ msg = _('dest= is required for options like %r')
1448
+ raise ValueError(msg % option_string)
1449
+ dest = dest.replace('-', '_')
1450
+
1451
+ # return the updated keyword arguments
1452
+ return dict(kwargs, dest=dest, option_strings=option_strings)
1453
+
1454
+ def _pop_action_class(self, kwargs, default=None):
1455
+ action = kwargs.pop('action', default)
1456
+ return self._registry_get('action', action, action)
1457
+
1458
+ def _get_handler(self):
1459
+ # determine function from conflict handler string
1460
+ handler_func_name = '_handle_conflict_%s' % self.conflict_handler
1461
+ try:
1462
+ return getattr(self, handler_func_name)
1463
+ except AttributeError:
1464
+ msg = _('invalid conflict_resolution value: %r')
1465
+ raise ValueError(msg % self.conflict_handler)
1466
+
1467
+ def _check_conflict(self, action):
1468
+
1469
+ # find all options that conflict with this option
1470
+ confl_optionals = []
1471
+ for option_string in action.option_strings:
1472
+ if option_string in self._option_string_actions:
1473
+ confl_optional = self._option_string_actions[option_string]
1474
+ confl_optionals.append((option_string, confl_optional))
1475
+
1476
+ # resolve any conflicts
1477
+ if confl_optionals:
1478
+ conflict_handler = self._get_handler()
1479
+ conflict_handler(action, confl_optionals)
1480
+
1481
+ def _handle_conflict_error(self, action, conflicting_actions):
1482
+ message = ngettext('conflicting option string: %s',
1483
+ 'conflicting option strings: %s',
1484
+ len(conflicting_actions))
1485
+ conflict_string = ', '.join([option_string
1486
+ for option_string, action
1487
+ in conflicting_actions])
1488
+ raise ArgumentError(action, message % conflict_string)
1489
+
1490
+ def _handle_conflict_resolve(self, action, conflicting_actions):
1491
+
1492
+ # remove all conflicting options
1493
+ for option_string, action in conflicting_actions:
1494
+
1495
+ # remove the conflicting option
1496
+ action.option_strings.remove(option_string)
1497
+ self._option_string_actions.pop(option_string, None)
1498
+
1499
+ # if the option now has no option string, remove it from the
1500
+ # container holding it
1501
+ if not action.option_strings:
1502
+ action.container._remove_action(action)
1503
+
1504
+
1505
+ class _ArgumentGroup(_ActionsContainer):
1506
+
1507
+ def __init__(self, container, title=None, description=None, **kwargs):
1508
+ # add any missing keyword arguments by checking the container
1509
+ update = kwargs.setdefault
1510
+ update('conflict_handler', container.conflict_handler)
1511
+ update('prefix_chars', container.prefix_chars)
1512
+ update('argument_default', container.argument_default)
1513
+ super_init = super(_ArgumentGroup, self).__init__
1514
+ super_init(description=description, **kwargs)
1515
+
1516
+ # group attributes
1517
+ self.title = title
1518
+ self._group_actions = []
1519
+
1520
+ # share most attributes with the container
1521
+ self._registries = container._registries
1522
+ self._actions = container._actions
1523
+ self._option_string_actions = container._option_string_actions
1524
+ self._defaults = container._defaults
1525
+ self._has_negative_number_optionals = \
1526
+ container._has_negative_number_optionals
1527
+ self._mutually_exclusive_groups = container._mutually_exclusive_groups
1528
+
1529
+ def _add_action(self, action):
1530
+ action = super(_ArgumentGroup, self)._add_action(action)
1531
+ self._group_actions.append(action)
1532
+ return action
1533
+
1534
+ def _remove_action(self, action):
1535
+ super(_ArgumentGroup, self)._remove_action(action)
1536
+ self._group_actions.remove(action)
1537
+
1538
+
1539
+ class _MutuallyExclusiveGroup(_ArgumentGroup):
1540
+
1541
+ def __init__(self, container, required=False):
1542
+ super(_MutuallyExclusiveGroup, self).__init__(container)
1543
+ self.required = required
1544
+ self._container = container
1545
+
1546
+ def _add_action(self, action):
1547
+ if action.required:
1548
+ msg = _('mutually exclusive arguments must be optional')
1549
+ raise ValueError(msg)
1550
+ action = self._container._add_action(action)
1551
+ self._group_actions.append(action)
1552
+ return action
1553
+
1554
+ def _remove_action(self, action):
1555
+ self._container._remove_action(action)
1556
+ self._group_actions.remove(action)
1557
+
1558
+
1559
+ class ArgumentParser(_AttributeHolder, _ActionsContainer):
1560
+ """Object for parsing command line strings into Python objects.
1561
+
1562
+ Keyword Arguments:
1563
+ - prog -- The name of the program (default: sys.argv[0])
1564
+ - usage -- A usage message (default: auto-generated from arguments)
1565
+ - description -- A description of what the program does
1566
+ - epilog -- Text following the argument descriptions
1567
+ - parents -- Parsers whose arguments should be copied into this one
1568
+ - formatter_class -- HelpFormatter class for printing help messages
1569
+ - prefix_chars -- Characters that prefix optional arguments
1570
+ - fromfile_prefix_chars -- Characters that prefix files containing
1571
+ additional arguments
1572
+ - argument_default -- The default value for all arguments
1573
+ - conflict_handler -- String indicating how to handle conflicts
1574
+ - add_help -- Add a -h/-help option
1575
+ """
1576
+
1577
+ def __init__(self,
1578
+ prog=None,
1579
+ usage=None,
1580
+ description=None,
1581
+ epilog=None,
1582
+ version=None,
1583
+ parents=[],
1584
+ formatter_class=HelpFormatter,
1585
+ prefix_chars='-',
1586
+ fromfile_prefix_chars=None,
1587
+ argument_default=None,
1588
+ conflict_handler='error',
1589
+ add_help=True):
1590
+
1591
+ if version is not None:
1592
+ import warnings
1593
+ warnings.warn(
1594
+ """The "version" argument to ArgumentParser is deprecated. """
1595
+ """Please use """
1596
+ """"add_argument(..., action='version', version="N", ...)" """
1597
+ """instead""", DeprecationWarning)
1598
+
1599
+ superinit = super(ArgumentParser, self).__init__
1600
+ superinit(description=description,
1601
+ prefix_chars=prefix_chars,
1602
+ argument_default=argument_default,
1603
+ conflict_handler=conflict_handler)
1604
+
1605
+ # default setting for prog
1606
+ if prog is None:
1607
+ prog = _os.path.basename(_sys.argv[0])
1608
+
1609
+ self.prog = prog
1610
+ self.usage = usage
1611
+ self.epilog = epilog
1612
+ self.version = version
1613
+ self.formatter_class = formatter_class
1614
+ self.fromfile_prefix_chars = fromfile_prefix_chars
1615
+ self.add_help = add_help
1616
+
1617
+ add_group = self.add_argument_group
1618
+ self._positionals = add_group(_('positional arguments'))
1619
+ self._optionals = add_group(_('optional arguments'))
1620
+ self._subparsers = None
1621
+
1622
+ # register types
1623
+ def identity(string):
1624
+ return string
1625
+ self.register('type', None, identity)
1626
+
1627
+ # add help and version arguments if necessary
1628
+ # (using explicit default to override global argument_default)
1629
+ default_prefix = '-' if '-' in prefix_chars else prefix_chars[0]
1630
+ if self.add_help:
1631
+ self.add_argument(
1632
+ default_prefix+'h', default_prefix*2+'help',
1633
+ action='help', default=SUPPRESS,
1634
+ help=_('show this help message and exit'))
1635
+ if self.version:
1636
+ self.add_argument(
1637
+ default_prefix+'v', default_prefix*2+'version',
1638
+ action='version', default=SUPPRESS,
1639
+ version=self.version,
1640
+ help=_("show program's version number and exit"))
1641
+
1642
+ # add parent arguments and defaults
1643
+ for parent in parents:
1644
+ self._add_container_actions(parent)
1645
+ try:
1646
+ defaults = parent._defaults
1647
+ except AttributeError:
1648
+ pass
1649
+ else:
1650
+ self._defaults.update(defaults)
1651
+
1652
+ # =======================
1653
+ # Pretty __repr__ methods
1654
+ # =======================
1655
+ def _get_kwargs(self):
1656
+ names = [
1657
+ 'prog',
1658
+ 'usage',
1659
+ 'description',
1660
+ 'version',
1661
+ 'formatter_class',
1662
+ 'conflict_handler',
1663
+ 'add_help',
1664
+ ]
1665
+ return [(name, getattr(self, name)) for name in names]
1666
+
1667
+ # ==================================
1668
+ # Optional/Positional adding methods
1669
+ # ==================================
1670
+ def add_subparsers(self, **kwargs):
1671
+ if self._subparsers is not None:
1672
+ self.error(_('cannot have multiple subparser arguments'))
1673
+
1674
+ # add the parser class to the arguments if it's not present
1675
+ kwargs.setdefault('parser_class', type(self))
1676
+
1677
+ if 'title' in kwargs or 'description' in kwargs:
1678
+ title = _(kwargs.pop('title', 'subcommands'))
1679
+ description = _(kwargs.pop('description', None))
1680
+ self._subparsers = self.add_argument_group(title, description)
1681
+ else:
1682
+ self._subparsers = self._positionals
1683
+
1684
+ # prog defaults to the usage message of this parser, skipping
1685
+ # optional arguments and with no "usage:" prefix
1686
+ if kwargs.get('prog') is None:
1687
+ formatter = self._get_formatter()
1688
+ positionals = self._get_positional_actions()
1689
+ groups = self._mutually_exclusive_groups
1690
+ formatter.add_usage(self.usage, positionals, groups, '')
1691
+ kwargs['prog'] = formatter.format_help().strip()
1692
+
1693
+ # create the parsers action and add it to the positionals list
1694
+ parsers_class = self._pop_action_class(kwargs, 'parsers')
1695
+ action = parsers_class(option_strings=[], **kwargs)
1696
+ self._subparsers._add_action(action)
1697
+
1698
+ # return the created parsers action
1699
+ return action
1700
+
1701
+ def _add_action(self, action):
1702
+ if action.option_strings:
1703
+ self._optionals._add_action(action)
1704
+ else:
1705
+ self._positionals._add_action(action)
1706
+ return action
1707
+
1708
+ def _get_optional_actions(self):
1709
+ return [action
1710
+ for action in self._actions
1711
+ if action.option_strings]
1712
+
1713
+ def _get_positional_actions(self):
1714
+ return [action
1715
+ for action in self._actions
1716
+ if not action.option_strings]
1717
+
1718
+ # =====================================
1719
+ # Command line argument parsing methods
1720
+ # =====================================
1721
+ def parse_args(self, args=None, namespace=None):
1722
+ args, argv = self.parse_known_args(args, namespace)
1723
+ if argv:
1724
+ msg = _('unrecognized arguments: %s')
1725
+ self.error(msg % ' '.join(argv))
1726
+ return args
1727
+
1728
+ def parse_known_args(self, args=None, namespace=None):
1729
+ # args default to the system args
1730
+ if args is None:
1731
+ args = _sys.argv[1:]
1732
+
1733
+ # default Namespace built from parser defaults
1734
+ if namespace is None:
1735
+ namespace = Namespace()
1736
+
1737
+ # add any action defaults that aren't present
1738
+ for action in self._actions:
1739
+ if action.dest is not SUPPRESS:
1740
+ if not hasattr(namespace, action.dest):
1741
+ if action.default is not SUPPRESS:
1742
+ default = action.default
1743
+ if isinstance(action.default, str):
1744
+ default = self._get_value(action, default)
1745
+ setattr(namespace, action.dest, default)
1746
+
1747
+ # add any parser defaults that aren't present
1748
+ for dest in self._defaults:
1749
+ if not hasattr(namespace, dest):
1750
+ setattr(namespace, dest, self._defaults[dest])
1751
+
1752
+ # parse the arguments and exit if there are any errors
1753
+ try:
1754
+ namespace, args = self._parse_known_args(args, namespace)
1755
+ if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR):
1756
+ args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR))
1757
+ delattr(namespace, _UNRECOGNIZED_ARGS_ATTR)
1758
+ return namespace, args
1759
+ except ArgumentError:
1760
+ err = _sys.exc_info()[1]
1761
+ self.error(str(err))
1762
+
1763
+ def _parse_known_args(self, arg_strings, namespace):
1764
+ # replace arg strings that are file references
1765
+ if self.fromfile_prefix_chars is not None:
1766
+ arg_strings = self._read_args_from_files(arg_strings)
1767
+
1768
+ # map all mutually exclusive arguments to the other arguments
1769
+ # they can't occur with
1770
+ action_conflicts = {}
1771
+ for mutex_group in self._mutually_exclusive_groups:
1772
+ group_actions = mutex_group._group_actions
1773
+ for i, mutex_action in enumerate(mutex_group._group_actions):
1774
+ conflicts = action_conflicts.setdefault(mutex_action, [])
1775
+ conflicts.extend(group_actions[:i])
1776
+ conflicts.extend(group_actions[i + 1:])
1777
+
1778
+ # find all option indices, and determine the arg_string_pattern
1779
+ # which has an 'O' if there is an option at an index,
1780
+ # an 'A' if there is an argument, or a '-' if there is a '--'
1781
+ option_string_indices = {}
1782
+ arg_string_pattern_parts = []
1783
+ arg_strings_iter = iter(arg_strings)
1784
+ for i, arg_string in enumerate(arg_strings_iter):
1785
+
1786
+ # all args after -- are non-options
1787
+ if arg_string == '--':
1788
+ arg_string_pattern_parts.append('-')
1789
+ for arg_string in arg_strings_iter:
1790
+ arg_string_pattern_parts.append('A')
1791
+
1792
+ # otherwise, add the arg to the arg strings
1793
+ # and note the index if it was an option
1794
+ else:
1795
+ option_tuple = self._parse_optional(arg_string)
1796
+ if option_tuple is None:
1797
+ pattern = 'A'
1798
+ else:
1799
+ option_string_indices[i] = option_tuple
1800
+ pattern = 'O'
1801
+ arg_string_pattern_parts.append(pattern)
1802
+
1803
+ # join the pieces together to form the pattern
1804
+ arg_strings_pattern = ''.join(arg_string_pattern_parts)
1805
+
1806
+ # converts arg strings to the appropriate and then takes the action
1807
+ seen_actions = set()
1808
+ seen_non_default_actions = set()
1809
+
1810
+ def take_action(action, argument_strings, option_string=None):
1811
+ seen_actions.add(action)
1812
+ argument_values = self._get_values(action, argument_strings)
1813
+
1814
+ # error if this argument is not allowed with other previously
1815
+ # seen arguments, assuming that actions that use the default
1816
+ # value don't really count as "present"
1817
+ if argument_values is not action.default:
1818
+ seen_non_default_actions.add(action)
1819
+ for conflict_action in action_conflicts.get(action, []):
1820
+ if conflict_action in seen_non_default_actions:
1821
+ msg = _('not allowed with argument %s')
1822
+ action_name = _get_action_name(conflict_action)
1823
+ raise ArgumentError(action, msg % action_name)
1824
+
1825
+ # take the action if we didn't receive a SUPPRESS value
1826
+ # (e.g. from a default)
1827
+ if argument_values is not SUPPRESS:
1828
+ action(self, namespace, argument_values, option_string)
1829
+
1830
+ # function to convert arg_strings into an optional action
1831
+ def consume_optional(start_index):
1832
+
1833
+ # get the optional identified at this index
1834
+ option_tuple = option_string_indices[start_index]
1835
+ action, option_string, explicit_arg = option_tuple
1836
+
1837
+ # identify additional optionals in the same arg string
1838
+ # (e.g. -xyz is the same as -x -y -z if no args are required)
1839
+ match_argument = self._match_argument
1840
+ action_tuples = []
1841
+ while True:
1842
+
1843
+ # if we found no optional action, skip it
1844
+ if action is None:
1845
+ extras.append(arg_strings[start_index])
1846
+ return start_index + 1
1847
+
1848
+ # if there is an explicit argument, try to match the
1849
+ # optional's string arguments to only this
1850
+ if explicit_arg is not None:
1851
+ arg_count = match_argument(action, 'A')
1852
+
1853
+ # if the action is a single-dash option and takes no
1854
+ # arguments, try to parse more single-dash options out
1855
+ # of the tail of the option string
1856
+ chars = self.prefix_chars
1857
+ if arg_count == 0 and option_string[1] not in chars:
1858
+ action_tuples.append((action, [], option_string))
1859
+ char = option_string[0]
1860
+ option_string = char + explicit_arg[0]
1861
+ new_explicit_arg = explicit_arg[1:] or None
1862
+ optionals_map = self._option_string_actions
1863
+ if option_string in optionals_map:
1864
+ action = optionals_map[option_string]
1865
+ explicit_arg = new_explicit_arg
1866
+ else:
1867
+ msg = _('ignored explicit argument %r')
1868
+ raise ArgumentError(action, msg % explicit_arg)
1869
+
1870
+ # if the action expect exactly one argument, we've
1871
+ # successfully matched the option; exit the loop
1872
+ elif arg_count == 1:
1873
+ stop = start_index + 1
1874
+ args = [explicit_arg]
1875
+ action_tuples.append((action, args, option_string))
1876
+ break
1877
+
1878
+ # error if a double-dash option did not use the
1879
+ # explicit argument
1880
+ else:
1881
+ msg = _('ignored explicit argument %r')
1882
+ raise ArgumentError(action, msg % explicit_arg)
1883
+
1884
+ # if there is no explicit argument, try to match the
1885
+ # optional's string arguments with the following strings
1886
+ # if successful, exit the loop
1887
+ else:
1888
+ start = start_index + 1
1889
+ selected_patterns = arg_strings_pattern[start:]
1890
+ arg_count = match_argument(action, selected_patterns)
1891
+ stop = start + arg_count
1892
+ args = arg_strings[start:stop]
1893
+ action_tuples.append((action, args, option_string))
1894
+ break
1895
+
1896
+ # add the Optional to the list and return the index at which
1897
+ # the Optional's string args stopped
1898
+ assert action_tuples
1899
+ for action, args, option_string in action_tuples:
1900
+ take_action(action, args, option_string)
1901
+ return stop
1902
+
1903
+ # the list of Positionals left to be parsed; this is modified
1904
+ # by consume_positionals()
1905
+ positionals = self._get_positional_actions()
1906
+
1907
+ # function to convert arg_strings into positional actions
1908
+ def consume_positionals(start_index):
1909
+ # match as many Positionals as possible
1910
+ match_partial = self._match_arguments_partial
1911
+ selected_pattern = arg_strings_pattern[start_index:]
1912
+ arg_counts = match_partial(positionals, selected_pattern)
1913
+
1914
+ # slice off the appropriate arg strings for each Positional
1915
+ # and add the Positional and its args to the list
1916
+ for action, arg_count in zip(positionals, arg_counts):
1917
+ args = arg_strings[start_index: start_index + arg_count]
1918
+ start_index += arg_count
1919
+ take_action(action, args)
1920
+
1921
+ # slice off the Positionals that we just parsed and return the
1922
+ # index at which the Positionals' string args stopped
1923
+ positionals[:] = positionals[len(arg_counts):]
1924
+ return start_index
1925
+
1926
+ # consume Positionals and Optionals alternately, until we have
1927
+ # passed the last option string
1928
+ extras = []
1929
+ start_index = 0
1930
+ if option_string_indices:
1931
+ max_option_string_index = max(option_string_indices)
1932
+ else:
1933
+ max_option_string_index = -1
1934
+ while start_index <= max_option_string_index:
1935
+
1936
+ # consume any Positionals preceding the next option
1937
+ next_option_string_index = min([
1938
+ index
1939
+ for index in option_string_indices
1940
+ if index >= start_index])
1941
+ if start_index != next_option_string_index:
1942
+ positionals_end_index = consume_positionals(start_index)
1943
+
1944
+ # only try to parse the next optional if we didn't consume
1945
+ # the option string during the positionals parsing
1946
+ if positionals_end_index > start_index:
1947
+ start_index = positionals_end_index
1948
+ continue
1949
+ else:
1950
+ start_index = positionals_end_index
1951
+
1952
+ # if we consumed all the positionals we could and we're not
1953
+ # at the index of an option string, there were extra arguments
1954
+ if start_index not in option_string_indices:
1955
+ strings = arg_strings[start_index:next_option_string_index]
1956
+ extras.extend(strings)
1957
+ start_index = next_option_string_index
1958
+
1959
+ # consume the next optional and any arguments for it
1960
+ start_index = consume_optional(start_index)
1961
+
1962
+ # consume any positionals following the last Optional
1963
+ stop_index = consume_positionals(start_index)
1964
+
1965
+ # if we didn't consume all the argument strings, there were extras
1966
+ extras.extend(arg_strings[stop_index:])
1967
+
1968
+ # make sure all required actions were present
1969
+ required_actions = [_get_action_name(action) for action in self._actions
1970
+ if action.required and action not in seen_actions]
1971
+ if required_actions:
1972
+ self.error(_('the following arguments are required: %s') %
1973
+ ', '.join(required_actions))
1974
+
1975
+ # make sure all required groups had one option present
1976
+ for group in self._mutually_exclusive_groups:
1977
+ if group.required:
1978
+ for action in group._group_actions:
1979
+ if action in seen_non_default_actions:
1980
+ break
1981
+
1982
+ # if no actions were used, report the error
1983
+ else:
1984
+ names = [_get_action_name(action)
1985
+ for action in group._group_actions
1986
+ if action.help is not SUPPRESS]
1987
+ msg = _('one of the arguments %s is required')
1988
+ self.error(msg % ' '.join(names))
1989
+
1990
+ # return the updated namespace and the extra arguments
1991
+ return namespace, extras
1992
+
1993
+ def _read_args_from_files(self, arg_strings):
1994
+ # expand arguments referencing files
1995
+ new_arg_strings = []
1996
+ for arg_string in arg_strings:
1997
+
1998
+ # for regular arguments, just add them back into the list
1999
+ if arg_string[0] not in self.fromfile_prefix_chars:
2000
+ new_arg_strings.append(arg_string)
2001
+
2002
+ # replace arguments referencing files with the file content
2003
+ else:
2004
+ try:
2005
+ args_file = open(arg_string[1:])
2006
+ try:
2007
+ arg_strings = []
2008
+ for arg_line in args_file.read().splitlines():
2009
+ for arg in self.convert_arg_line_to_args(arg_line):
2010
+ arg_strings.append(arg)
2011
+ arg_strings = self._read_args_from_files(arg_strings)
2012
+ new_arg_strings.extend(arg_strings)
2013
+ finally:
2014
+ args_file.close()
2015
+ except IOError:
2016
+ err = _sys.exc_info()[1]
2017
+ self.error(str(err))
2018
+
2019
+ # return the modified argument list
2020
+ return new_arg_strings
2021
+
2022
+ def convert_arg_line_to_args(self, arg_line):
2023
+ return [arg_line]
2024
+
2025
+ def _match_argument(self, action, arg_strings_pattern):
2026
+ # match the pattern for this action to the arg strings
2027
+ nargs_pattern = self._get_nargs_pattern(action)
2028
+ match = _re.match(nargs_pattern, arg_strings_pattern)
2029
+
2030
+ # raise an exception if we weren't able to find a match
2031
+ if match is None:
2032
+ nargs_errors = {
2033
+ None: _('expected one argument'),
2034
+ OPTIONAL: _('expected at most one argument'),
2035
+ ONE_OR_MORE: _('expected at least one argument'),
2036
+ }
2037
+ default = ngettext('expected %s argument',
2038
+ 'expected %s arguments',
2039
+ action.nargs) % action.nargs
2040
+ msg = nargs_errors.get(action.nargs, default)
2041
+ raise ArgumentError(action, msg)
2042
+
2043
+ # return the number of arguments matched
2044
+ return len(match.group(1))
2045
+
2046
+ def _match_arguments_partial(self, actions, arg_strings_pattern):
2047
+ # progressively shorten the actions list by slicing off the
2048
+ # final actions until we find a match
2049
+ result = []
2050
+ for i in range(len(actions), 0, -1):
2051
+ actions_slice = actions[:i]
2052
+ pattern = ''.join([self._get_nargs_pattern(action)
2053
+ for action in actions_slice])
2054
+ match = _re.match(pattern, arg_strings_pattern)
2055
+ if match is not None:
2056
+ result.extend([len(string) for string in match.groups()])
2057
+ break
2058
+
2059
+ # return the list of arg string counts
2060
+ return result
2061
+
2062
+ def _parse_optional(self, arg_string):
2063
+ # if it's an empty string, it was meant to be a positional
2064
+ if not arg_string:
2065
+ return None
2066
+
2067
+ # if it doesn't start with a prefix, it was meant to be positional
2068
+ if not arg_string[0] in self.prefix_chars:
2069
+ return None
2070
+
2071
+ # if the option string is present in the parser, return the action
2072
+ if arg_string in self._option_string_actions:
2073
+ action = self._option_string_actions[arg_string]
2074
+ return action, arg_string, None
2075
+
2076
+ # if it's just a single character, it was meant to be positional
2077
+ if len(arg_string) == 1:
2078
+ return None
2079
+
2080
+ # if the option string before the "=" is present, return the action
2081
+ if '=' in arg_string:
2082
+ option_string, explicit_arg = arg_string.split('=', 1)
2083
+ if option_string in self._option_string_actions:
2084
+ action = self._option_string_actions[option_string]
2085
+ return action, option_string, explicit_arg
2086
+
2087
+ # search through all possible prefixes of the option string
2088
+ # and all actions in the parser for possible interpretations
2089
+ option_tuples = self._get_option_tuples(arg_string)
2090
+
2091
+ # if multiple actions match, the option string was ambiguous
2092
+ if len(option_tuples) > 1:
2093
+ options = ', '.join([option_string
2094
+ for action, option_string, explicit_arg in option_tuples])
2095
+ args = {'option': arg_string, 'matches': options}
2096
+ msg = _('ambiguous option: %(option)s could match %(matches)s')
2097
+ self.error(msg % args)
2098
+
2099
+ # if exactly one action matched, this segmentation is good,
2100
+ # so return the parsed action
2101
+ elif len(option_tuples) == 1:
2102
+ option_tuple, = option_tuples
2103
+ return option_tuple
2104
+
2105
+ # if it was not found as an option, but it looks like a negative
2106
+ # number, it was meant to be positional
2107
+ # unless there are negative-number-like options
2108
+ if self._negative_number_matcher.match(arg_string):
2109
+ if not self._has_negative_number_optionals:
2110
+ return None
2111
+
2112
+ # if it contains a space, it was meant to be a positional
2113
+ if ' ' in arg_string:
2114
+ return None
2115
+
2116
+ # it was meant to be an optional but there is no such option
2117
+ # in this parser (though it might be a valid option in a subparser)
2118
+ return None, arg_string, None
2119
+
2120
+ def _get_option_tuples(self, option_string):
2121
+ result = []
2122
+
2123
+ # option strings starting with two prefix characters are only
2124
+ # split at the '='
2125
+ chars = self.prefix_chars
2126
+ if option_string[0] in chars and option_string[1] in chars:
2127
+ if '=' in option_string:
2128
+ option_prefix, explicit_arg = option_string.split('=', 1)
2129
+ else:
2130
+ option_prefix = option_string
2131
+ explicit_arg = None
2132
+ for option_string in self._option_string_actions:
2133
+ if option_string.startswith(option_prefix):
2134
+ action = self._option_string_actions[option_string]
2135
+ tup = action, option_string, explicit_arg
2136
+ result.append(tup)
2137
+
2138
+ # single character options can be concatenated with their arguments
2139
+ # but multiple character options always have to have their argument
2140
+ # separate
2141
+ elif option_string[0] in chars and option_string[1] not in chars:
2142
+ option_prefix = option_string
2143
+ explicit_arg = None
2144
+ short_option_prefix = option_string[:2]
2145
+ short_explicit_arg = option_string[2:]
2146
+
2147
+ for option_string in self._option_string_actions:
2148
+ if option_string == short_option_prefix:
2149
+ action = self._option_string_actions[option_string]
2150
+ tup = action, option_string, short_explicit_arg
2151
+ result.append(tup)
2152
+ elif option_string.startswith(option_prefix):
2153
+ action = self._option_string_actions[option_string]
2154
+ tup = action, option_string, explicit_arg
2155
+ result.append(tup)
2156
+
2157
+ # shouldn't ever get here
2158
+ else:
2159
+ self.error(_('unexpected option string: %s') % option_string)
2160
+
2161
+ # return the collected option tuples
2162
+ return result
2163
+
2164
+ def _get_nargs_pattern(self, action):
2165
+ # in all examples below, we have to allow for '--' args
2166
+ # which are represented as '-' in the pattern
2167
+ nargs = action.nargs
2168
+
2169
+ # the default (None) is assumed to be a single argument
2170
+ if nargs is None:
2171
+ nargs_pattern = '(-*A-*)'
2172
+
2173
+ # allow zero or one arguments
2174
+ elif nargs == OPTIONAL:
2175
+ nargs_pattern = '(-*A?-*)'
2176
+
2177
+ # allow zero or more arguments
2178
+ elif nargs == ZERO_OR_MORE:
2179
+ nargs_pattern = '(-*[A-]*)'
2180
+
2181
+ # allow one or more arguments
2182
+ elif nargs == ONE_OR_MORE:
2183
+ nargs_pattern = '(-*A[A-]*)'
2184
+
2185
+ # allow any number of options or arguments
2186
+ elif nargs == REMAINDER:
2187
+ nargs_pattern = '([-AO]*)'
2188
+
2189
+ # allow one argument followed by any number of options or arguments
2190
+ elif nargs == PARSER:
2191
+ nargs_pattern = '(-*A[-AO]*)'
2192
+
2193
+ # all others should be integers
2194
+ else:
2195
+ nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs)
2196
+
2197
+ # if this is an optional action, -- is not allowed
2198
+ if action.option_strings:
2199
+ nargs_pattern = nargs_pattern.replace('-*', '')
2200
+ nargs_pattern = nargs_pattern.replace('-', '')
2201
+
2202
+ # return the pattern
2203
+ return nargs_pattern
2204
+
2205
+ # ========================
2206
+ # Value conversion methods
2207
+ # ========================
2208
+ def _get_values(self, action, arg_strings):
2209
+ # for everything but PARSER args, strip out '--'
2210
+ if action.nargs not in [PARSER, REMAINDER]:
2211
+ arg_strings = [s for s in arg_strings if s != '--']
2212
+
2213
+ # optional argument produces a default when not present
2214
+ if not arg_strings and action.nargs == OPTIONAL:
2215
+ if action.option_strings:
2216
+ value = action.const
2217
+ else:
2218
+ value = action.default
2219
+ if isinstance(value, str):
2220
+ value = self._get_value(action, value)
2221
+ self._check_value(action, value)
2222
+
2223
+ # when nargs='*' on a positional, if there were no command-line
2224
+ # args, use the default if it is anything other than None
2225
+ elif (not arg_strings and action.nargs == ZERO_OR_MORE and
2226
+ not action.option_strings):
2227
+ if action.default is not None:
2228
+ value = action.default
2229
+ else:
2230
+ value = arg_strings
2231
+ self._check_value(action, value)
2232
+
2233
+ # single argument or optional argument produces a single value
2234
+ elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]:
2235
+ arg_string, = arg_strings
2236
+ value = self._get_value(action, arg_string)
2237
+ self._check_value(action, value)
2238
+
2239
+ # REMAINDER arguments convert all values, checking none
2240
+ elif action.nargs == REMAINDER:
2241
+ value = [self._get_value(action, v) for v in arg_strings]
2242
+
2243
+ # PARSER arguments convert all values, but check only the first
2244
+ elif action.nargs == PARSER:
2245
+ value = [self._get_value(action, v) for v in arg_strings]
2246
+ self._check_value(action, value[0])
2247
+
2248
+ # all other types of nargs produce a list
2249
+ else:
2250
+ value = [self._get_value(action, v) for v in arg_strings]
2251
+ for v in value:
2252
+ self._check_value(action, v)
2253
+
2254
+ # return the converted value
2255
+ return value
2256
+
2257
+ def _get_value(self, action, arg_string):
2258
+ type_func = self._registry_get('type', action.type, action.type)
2259
+ if not callable(type_func):
2260
+ msg = _('%r is not callable')
2261
+ raise ArgumentError(action, msg % type_func)
2262
+
2263
+ # convert the value to the appropriate type
2264
+ try:
2265
+ result = type_func(arg_string)
2266
+
2267
+ # ArgumentTypeErrors indicate errors
2268
+ except ArgumentTypeError:
2269
+ name = getattr(action.type, '__name__', repr(action.type))
2270
+ msg = str(_sys.exc_info()[1])
2271
+ raise ArgumentError(action, msg)
2272
+
2273
+ # TypeErrors or ValueErrors also indicate errors
2274
+ except (TypeError, ValueError):
2275
+ name = getattr(action.type, '__name__', repr(action.type))
2276
+ args = {'type': name, 'value': arg_string}
2277
+ msg = _('invalid %(type)s value: %(value)r')
2278
+ raise ArgumentError(action, msg % args)
2279
+
2280
+ # return the converted value
2281
+ return result
2282
+
2283
+ def _check_value(self, action, value):
2284
+ # converted value must be one of the choices (if specified)
2285
+ if action.choices is not None and value not in action.choices:
2286
+ args = {'value': value,
2287
+ 'choices': ', '.join(map(repr, action.choices))}
2288
+ msg = _('invalid choice: %(value)r (choose from %(choices)s)')
2289
+ raise ArgumentError(action, msg % args)
2290
+
2291
+ # =======================
2292
+ # Help-formatting methods
2293
+ # =======================
2294
+ def format_usage(self):
2295
+ formatter = self._get_formatter()
2296
+ formatter.add_usage(self.usage, self._actions,
2297
+ self._mutually_exclusive_groups)
2298
+ return formatter.format_help()
2299
+
2300
+ def format_help(self):
2301
+ formatter = self._get_formatter()
2302
+
2303
+ # usage
2304
+ formatter.add_usage(self.usage, self._actions,
2305
+ self._mutually_exclusive_groups)
2306
+
2307
+ # description
2308
+ formatter.add_text(self.description)
2309
+
2310
+ # positionals, optionals and user-defined groups
2311
+ for action_group in self._action_groups:
2312
+ formatter.start_section(action_group.title)
2313
+ formatter.add_text(action_group.description)
2314
+ formatter.add_arguments(action_group._group_actions)
2315
+ formatter.end_section()
2316
+
2317
+ # epilog
2318
+ formatter.add_text(self.epilog)
2319
+
2320
+ # determine help from format above
2321
+ return formatter.format_help()
2322
+
2323
+ def format_version(self):
2324
+ import warnings
2325
+ warnings.warn(
2326
+ 'The format_version method is deprecated -- the "version" '
2327
+ 'argument to ArgumentParser is no longer supported.',
2328
+ DeprecationWarning)
2329
+ formatter = self._get_formatter()
2330
+ formatter.add_text(self.version)
2331
+ return formatter.format_help()
2332
+
2333
+ def _get_formatter(self):
2334
+ return self.formatter_class(prog=self.prog)
2335
+
2336
+ # =====================
2337
+ # Help-printing methods
2338
+ # =====================
2339
+ def print_usage(self, file=None):
2340
+ if file is None:
2341
+ file = _sys.stdout
2342
+ self._print_message(self.format_usage(), file)
2343
+
2344
+ def print_help(self, file=None):
2345
+ if file is None:
2346
+ file = _sys.stdout
2347
+ self._print_message(self.format_help(), file)
2348
+
2349
+ def print_version(self, file=None):
2350
+ import warnings
2351
+ warnings.warn(
2352
+ 'The print_version method is deprecated -- the "version" '
2353
+ 'argument to ArgumentParser is no longer supported.',
2354
+ DeprecationWarning)
2355
+ self._print_message(self.format_version(), file)
2356
+
2357
+ def _print_message(self, message, file=None):
2358
+ if message:
2359
+ if file is None:
2360
+ file = _sys.stderr
2361
+ file.write(message)
2362
+
2363
+ # ===============
2364
+ # Exiting methods
2365
+ # ===============
2366
+ def exit(self, status=0, message=None):
2367
+ if message:
2368
+ self._print_message(message, _sys.stderr)
2369
+ _sys.exit(status)
2370
+
2371
+ def error(self, message):
2372
+ """error(message: string)
2373
+
2374
+ Prints a usage message incorporating the message to stderr and
2375
+ exits.
2376
+
2377
+ If you override this in a subclass, it should not return -- it
2378
+ should either exit or raise an exception.
2379
+ """
2380
+ self.print_usage(_sys.stderr)
2381
+ args = {'prog': self.prog, 'message': message}
2382
+ self.exit(2, _('%(prog)s: error: %(message)s\n') % args)
mosesdecoder/contrib/tmcombine/test/model3/model/lex.counts.e2f ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad af 500 1000
2
+ bd bf 5 10
3
+ der le 20285 102586
4
+ der NULL 12926 704917
5
+ gipfel sommet 3485 7322
6
+ pass col 419 2911
7
+ pass passeport 7 28
8
+ sitzung séance 14 59
mosesdecoder/contrib/tmcombine/test/model3/model/lex.counts.f2e ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ af ad 500 1000
2
+ bf bd 5 10
3
+ col pass 419 615
4
+ le der 20285 113635
5
+ passeport pass 7 615
6
+ retrouvé NULL 34 1016136
7
+ séance sitzung 14 33
8
+ sommet gipfel 3485 5700
mosesdecoder/contrib/tmcombine/test/model3/model/lex.e2f ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad af 0.5
2
+ bd bf 0.5
3
+ der le 0.1977365
4
+ der NULL 0.0183369
5
+ gipfel sommet 0.4759629
6
+ pass col 0.1439368
7
+ pass passeport 0.2500000
8
+ sitzung séance 0.2372881
mosesdecoder/contrib/tmcombine/test/model3/model/lex.f2e ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ af ad 0.5
2
+ bf bd 0.5
3
+ col pass 0.6813008
4
+ le der 0.1785101
5
+ passeport pass 0.0113821
6
+ retrouvé NULL 0.0000335
7
+ séance sitzung 0.4242424
8
+ sommet gipfel 0.6114035
mosesdecoder/contrib/tmcombine/test/model3/model/phrase-table ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000 ||| sparse_feature 1
2
+ bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10 |||
3
+ der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00872768 0.0366795 0.611403 2.718 ||| 1-0 ||| 5808 518
4
+ der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749 45
5
+ pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875 582
6
+ pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.25 0.00171821 3.813e-07 2.718 ||| 0-0 ||| 2 582
7
+ pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.266667 0.25 0.00687285 0.0113821 2.718 ||| 0-0 ||| 15 582
8
+ sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.272727 0.237288 0.352941 0.424242 2.718 ||| 0-0 ||| 22 17
mosesdecoder/contrib/tmcombine/test/model5/model/lex.counts.e2f ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad af 500 1000
2
+ bd bf 5 10
3
+ der le 20285 102586
4
+ der NULL 12926 704917
5
+ gipfel sommet 3485 7322
6
+ pass col 419 2911
7
+ pass passeport 7 28
8
+ sitzung séance 14 59
mosesdecoder/contrib/tmcombine/test/model5/model/lex.counts.f2e ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ af ad 500 1000
2
+ bf bd 5 10
3
+ col pass 419 615
4
+ le der 20285 113635
5
+ passeport pass 7 615
6
+ retrouvé NULL 34 1016136
7
+ séance sitzung 14 33
8
+ sommet gipfel 3485 5700
mosesdecoder/contrib/tmcombine/test/model5/model/lex.e2f ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad af 0.5
2
+ bd bf 0.5
3
+ der le 0.1977365
4
+ der NULL 0.0183369
5
+ gipfel sommet 0.4759629
6
+ pass col 0.1439368
7
+ pass passeport 0.2500000
8
+ sitzung séance 0.2372881
mosesdecoder/contrib/tmcombine/test/model5/model/lex.f2e ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ af ad 0.5
2
+ bf bd 0.5
3
+ col pass 0.6813008
4
+ le der 0.1785101
5
+ passeport pass 0.0113821
6
+ retrouvé NULL 0.0000335
7
+ séance sitzung 0.4242424
8
+ sommet gipfel 0.6114035
mosesdecoder/contrib/tmcombine/test/model5/model/phrase-table ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad [X][X] [X] ||| af [X][X] [X] ||| 0.5 0.5 0.5 0.5 2.718 ||| 0-0 1-1 ||| 1000 1000
2
+ bd [X] ||| bf [X] ||| 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10
3
+ der gipfel [X] ||| sommet [X] ||| 0.00327135 0.00872768 0.0366795 0.611403 2.718 ||| 1-0 ||| 5808 518
4
+ der [X][X] pass [X] ||| le [X][X] col [X] ||| 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 2-2 ||| 749 45
5
+ pass [X] ||| col [X] ||| 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875 582
6
+ pass [X] ||| passeport retrouvé [X] ||| 0.5 0.25 0.00171821 3.813e-07 2.718 ||| 0-0 ||| 2 582
7
+ pass [X] ||| passeport [X] ||| 0.266667 0.25 0.00687285 0.0113821 2.718 ||| 0-0 ||| 15 582
8
+ [X][X] sitzung [X] ||| [X][X] séance [X] ||| 0.272727 0.237288 0.352941 0.424242 2.718 ||| 0-0 1-1 ||| 22 17
mosesdecoder/contrib/tmcombine/test/model6/model/lex.counts.e2f ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad af 100 1000
2
+ bd bf 1 10
3
+ der le 150181 944391
4
+ der NULL 54483 3595140
5
+ gipfel sommet 3421 9342
6
+ pass col 2 70
7
+ pass passeport 73 379
8
+ sitzung séance 3441 5753
mosesdecoder/contrib/tmcombine/test/model6/model/lex.counts.f2e ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ af ad 100 1000
2
+ bf bd 1 10
3
+ col pass 2 108
4
+ le der 150181 1356104
5
+ passeport pass 73 108
6
+ retrouvé NULL 43 6276240
7
+ séance sitzung 3441 6142
8
+ sommet gipfel 3421 4908
mosesdecoder/contrib/tmcombine/test/model6/model/lex.e2f ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad af 0.1
2
+ bd bf 0.1
3
+ der le 0.1590242
4
+ der NULL 0.0151546
5
+ gipfel sommet 0.366195
6
+ pass col 0.0285714
7
+ pass passeport 0.1926121
8
+ sitzung séance 0.5981227
mosesdecoder/contrib/tmcombine/test/model6/model/lex.f2e ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ af ad 0.1
2
+ bf bd 0.1
3
+ col pass 0.0185185
4
+ le der 0.1107445
5
+ passeport pass 0.6759259
6
+ retrouvé NULL 0.0000069
7
+ séance sitzung 0.5602410
8
+ sommet gipfel 0.6970253
mosesdecoder/contrib/tmcombine/test/model6/model/phrase-table ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ad [X][X] [X] ||| af [X][X] [X] ||| 0.1 0.1 0.1 0.1 2.718 ||| 0-0 1-1 ||| 1000 1000
2
+ bd [X] ||| bf [X] ||| 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 10 10
3
+ der [X][X] pass [X] ||| le [X][X] passeport [X] ||| 0.16 0.03063 0.4 0.0748551 2.718 ||| 0-0 1-1 2-2 ||| 25 10
4
+ pass [X] ||| passeport [X] ||| 0.28022 0.192612 0.607143 0.675926 2.718 ||| 0-0 ||| 182 84
5
+ [X][X] sitzung [X] ||| [X][X] séance [X] ||| 0.784521 0.598123 0.516654 0.560241 2.718 ||| 0-0 1-1 ||| 4251 6455
mosesdecoder/contrib/tmcombine/test/phrase-table_test1 ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ad ||| af ||| 0.3 0.3 0.3 0.3 ||| 0-0 ||| 1000 1000
2
+ bd ||| bf ||| 0.3 0.3 0.3 0.3 ||| 0-0 ||| 10 10
3
+ der gipfel ||| sommet ||| 0.00163568 0.00436384 0.0183397 0.305702 ||| 1-0 ||| 5808 518
4
+ der pass ||| le col ||| 0.00867825 0.0142308 0.144445 0.0608095 ||| 0-0 1-1 ||| 749 45
5
+ pass ||| col ||| 0.0976 0.0719685 0.314433 0.340651 ||| 0-0 ||| 1875 582
6
+ pass ||| passeport retrouvé ||| 0.25 0.125 0.000859105 1.9065e-07 ||| 0-0 ||| 2 582
7
+ pass ||| passeport ||| 0.273444 0.221306 0.307008 0.343654 ||| 0-0 ||| 15 582
8
+ sitzung ||| séance ||| 0.528624 0.417705 0.434797 0.492241 ||| 0-0 ||| 22 17
mosesdecoder/contrib/tmcombine/test/phrase-table_test10 ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ad ||| af ||| 0.3 0.3 0.3 0.3 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 25362.6029089 1074.23173673 ||| sparse_feature 1
2
+ bd ||| bf ||| 0.3 0.3 0.3 0.3 0.115771 0.35574 0.472359 0.469238 ||| 0-0 ||| 253.626029089 10.7423173673 |||
3
+ der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 ||| 1-0 ||| 5808.0 518.0
4
+ der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 ||| 0-0 1-1 ||| 749.0 45.7423173673
5
+ der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 ||| 0-0 1-1 ||| 609.065072723 45.7423173673
6
+ pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 ||| 0-0 ||| 1875.0 588.235465885
7
+ pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 ||| 0-0 ||| 2.0 588.235465885
8
+ pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 ||| 0-0 ||| 4448.99372942 588.235465885
9
+ sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 ||| 0-0 ||| 103587.424966 496.165860589