sleepyhead111 commited on
Commit
36ceee4
·
verified ·
1 Parent(s): cd33cd3

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. fairseq-0.10.2/fairseq_cli/preprocess.py +398 -0
  2. fairseq-0.10.2/fairseq_cli/score.py +96 -0
  3. fairseq-0.10.2/tests/test_backtranslation_dataset.py +123 -0
  4. fairseq-0.10.2/tests/test_constraints.py +269 -0
  5. fairseq-0.10.2/tests/test_convtbc.py +54 -0
  6. fairseq-0.10.2/tests/test_inference_dropout.py +66 -0
  7. fairseq-0.10.2/tests/test_metrics.py +77 -0
  8. fairseq-0.10.2/tests/test_sequence_scorer.py +120 -0
  9. fairseq-0.10.2/tests/test_token_block_dataset.py +79 -0
  10. mosesdecoder/.beautify-ignore +38 -0
  11. mosesdecoder/.travis.yml +24 -0
  12. mosesdecoder/azure-pipelines.yml +100 -0
  13. mosesdecoder/biconcor/Jamfile +2 -0
  14. mosesdecoder/biconcor/PhrasePairCollection.cpp +209 -0
  15. mosesdecoder/biconcor/PhrasePairCollection.h +46 -0
  16. mosesdecoder/biconcor/SuffixArray.h +82 -0
  17. mosesdecoder/biconcor/biconcor.cpp +171 -0
  18. mosesdecoder/chk.tmp +1 -0
  19. mosesdecoder/doxygen.conf +1781 -0
  20. mosesdecoder/moses-cmd/Jamfile +7 -0
  21. mosesdecoder/moses-cmd/LatticeMBRGrid.cpp +215 -0
  22. mosesdecoder/moses-cmd/Main.cpp +33 -0
  23. mosesdecoder/moses-cmd/MainVW.cpp +186 -0
  24. mosesdecoder/moses-cmd/MainVW.h +42 -0
  25. mosesdecoder/moses2/AlignmentInfo.h +148 -0
  26. mosesdecoder/moses2/AlignmentInfoCollection.cpp +62 -0
  27. mosesdecoder/moses2/ArcLists.cpp +127 -0
  28. mosesdecoder/moses2/Array.h +85 -0
  29. mosesdecoder/moses2/EstimatedScores.h +59 -0
  30. mosesdecoder/moses2/HypothesisBase.cpp +81 -0
  31. mosesdecoder/moses2/HypothesisBase.h +76 -0
  32. mosesdecoder/moses2/HypothesisColl.cpp +286 -0
  33. mosesdecoder/moses2/HypothesisColl.h +75 -0
  34. mosesdecoder/moses2/InputPathsBase.h +54 -0
  35. mosesdecoder/moses2/Main.cpp +116 -0
  36. mosesdecoder/moses2/Main.h +23 -0
  37. mosesdecoder/moses2/ManagerBase.cpp +53 -0
  38. mosesdecoder/moses2/ManagerBase.h +81 -0
  39. mosesdecoder/moses2/MemPool.h +77 -0
  40. mosesdecoder/moses2/MemPoolAllocator.h +85 -0
  41. mosesdecoder/moses2/Moses2Wrapper.h +30 -0
  42. mosesdecoder/moses2/Phrase.h +146 -0
  43. mosesdecoder/moses2/Recycler.cpp +13 -0
  44. mosesdecoder/moses2/Scores.h +81 -0
  45. mosesdecoder/moses2/SubPhrase.h +54 -0
  46. mosesdecoder/moses2/TargetPhrase.cpp +15 -0
  47. mosesdecoder/moses2/TranslationTask.cpp +65 -0
  48. mosesdecoder/moses2/TrellisPaths.h +64 -0
  49. mosesdecoder/moses2/TypeDef.h +125 -0
  50. mosesdecoder/moses2/Vector.cpp +14 -0
fairseq-0.10.2/fairseq_cli/preprocess.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+ #
4
+ # This source code is licensed under the MIT license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ """
7
+ Data pre-processing: build vocabularies and binarize training data.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ import shutil
13
+ import sys
14
+ from collections import Counter
15
+ from itertools import zip_longest
16
+ from multiprocessing import Pool
17
+
18
+ from fairseq import options, tasks, utils
19
+ from fairseq.binarizer import Binarizer
20
+ from fairseq.data import indexed_dataset
21
+
22
+
23
+ logging.basicConfig(
24
+ format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
25
+ datefmt="%Y-%m-%d %H:%M:%S",
26
+ level=os.environ.get("LOGLEVEL", "INFO").upper(),
27
+ stream=sys.stdout,
28
+ )
29
+ logger = logging.getLogger("fairseq_cli.preprocess")
30
+
31
+
32
+ def main(args):
33
+ utils.import_user_module(args)
34
+
35
+ os.makedirs(args.destdir, exist_ok=True)
36
+
37
+ logger.addHandler(
38
+ logging.FileHandler(
39
+ filename=os.path.join(args.destdir, "preprocess.log"),
40
+ )
41
+ )
42
+ logger.info(args)
43
+
44
+ task = tasks.get_task(args.task)
45
+
46
+ def train_path(lang):
47
+ return "{}{}".format(args.trainpref, ("." + lang) if lang else "")
48
+
49
+ def file_name(prefix, lang):
50
+ fname = prefix
51
+ if lang is not None:
52
+ fname += ".{lang}".format(lang=lang)
53
+ return fname
54
+
55
+ def dest_path(prefix, lang):
56
+ return os.path.join(args.destdir, file_name(prefix, lang))
57
+
58
+ def dict_path(lang):
59
+ return dest_path("dict", lang) + ".txt"
60
+
61
+ def build_dictionary(filenames, src=False, tgt=False):
62
+ assert src ^ tgt
63
+ return task.build_dictionary(
64
+ filenames,
65
+ workers=args.workers,
66
+ threshold=args.thresholdsrc if src else args.thresholdtgt,
67
+ nwords=args.nwordssrc if src else args.nwordstgt,
68
+ padding_factor=args.padding_factor,
69
+ )
70
+
71
+ target = not args.only_source
72
+
73
+ if not args.srcdict and os.path.exists(dict_path(args.source_lang)):
74
+ raise FileExistsError(dict_path(args.source_lang))
75
+ if target and not args.tgtdict and os.path.exists(dict_path(args.target_lang)):
76
+ raise FileExistsError(dict_path(args.target_lang))
77
+
78
+ if args.joined_dictionary:
79
+ assert (
80
+ not args.srcdict or not args.tgtdict
81
+ ), "cannot use both --srcdict and --tgtdict with --joined-dictionary"
82
+
83
+ if args.srcdict:
84
+ src_dict = task.load_dictionary(args.srcdict)
85
+ elif args.tgtdict:
86
+ src_dict = task.load_dictionary(args.tgtdict)
87
+ else:
88
+ assert (
89
+ args.trainpref
90
+ ), "--trainpref must be set if --srcdict is not specified"
91
+ src_dict = build_dictionary(
92
+ {train_path(lang) for lang in [args.source_lang, args.target_lang]},
93
+ src=True,
94
+ )
95
+ tgt_dict = src_dict
96
+ else:
97
+ if args.srcdict:
98
+ src_dict = task.load_dictionary(args.srcdict)
99
+ else:
100
+ assert (
101
+ args.trainpref
102
+ ), "--trainpref must be set if --srcdict is not specified"
103
+ src_dict = build_dictionary([train_path(args.source_lang)], src=True)
104
+
105
+ if target:
106
+ if args.tgtdict:
107
+ tgt_dict = task.load_dictionary(args.tgtdict)
108
+ else:
109
+ assert (
110
+ args.trainpref
111
+ ), "--trainpref must be set if --tgtdict is not specified"
112
+ tgt_dict = build_dictionary([train_path(args.target_lang)], tgt=True)
113
+ else:
114
+ tgt_dict = None
115
+
116
+ src_dict.save(dict_path(args.source_lang))
117
+ if target and tgt_dict is not None:
118
+ tgt_dict.save(dict_path(args.target_lang))
119
+
120
+ def make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers):
121
+ logger.info("[{}] Dictionary: {} types".format(lang, len(vocab)))
122
+ n_seq_tok = [0, 0]
123
+ replaced = Counter()
124
+
125
+ def merge_result(worker_result):
126
+ replaced.update(worker_result["replaced"])
127
+ n_seq_tok[0] += worker_result["nseq"]
128
+ n_seq_tok[1] += worker_result["ntok"]
129
+
130
+ input_file = "{}{}".format(
131
+ input_prefix, ("." + lang) if lang is not None else ""
132
+ )
133
+ offsets = Binarizer.find_offsets(input_file, num_workers)
134
+ pool = None
135
+ if num_workers > 1:
136
+ pool = Pool(processes=num_workers - 1)
137
+ for worker_id in range(1, num_workers):
138
+ prefix = "{}{}".format(output_prefix, worker_id)
139
+ pool.apply_async(
140
+ binarize,
141
+ (
142
+ args,
143
+ input_file,
144
+ vocab,
145
+ prefix,
146
+ lang,
147
+ offsets[worker_id],
148
+ offsets[worker_id + 1],
149
+ ),
150
+ callback=merge_result,
151
+ )
152
+ pool.close()
153
+
154
+ ds = indexed_dataset.make_builder(
155
+ dataset_dest_file(args, output_prefix, lang, "bin"),
156
+ impl=args.dataset_impl,
157
+ vocab_size=len(vocab),
158
+ )
159
+ merge_result(
160
+ Binarizer.binarize(
161
+ input_file, vocab, lambda t: ds.add_item(t), offset=0, end=offsets[1]
162
+ )
163
+ )
164
+ if num_workers > 1:
165
+ pool.join()
166
+ for worker_id in range(1, num_workers):
167
+ prefix = "{}{}".format(output_prefix, worker_id)
168
+ temp_file_path = dataset_dest_prefix(args, prefix, lang)
169
+ ds.merge_file_(temp_file_path)
170
+ os.remove(indexed_dataset.data_file_path(temp_file_path))
171
+ os.remove(indexed_dataset.index_file_path(temp_file_path))
172
+
173
+ ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
174
+
175
+ logger.info(
176
+ "[{}] {}: {} sents, {} tokens, {:.3}% replaced by {}".format(
177
+ lang,
178
+ input_file,
179
+ n_seq_tok[0],
180
+ n_seq_tok[1],
181
+ 100 * sum(replaced.values()) / n_seq_tok[1],
182
+ vocab.unk_word,
183
+ )
184
+ )
185
+
186
+ def make_binary_alignment_dataset(input_prefix, output_prefix, num_workers):
187
+ nseq = [0]
188
+
189
+ def merge_result(worker_result):
190
+ nseq[0] += worker_result["nseq"]
191
+
192
+ input_file = input_prefix
193
+ offsets = Binarizer.find_offsets(input_file, num_workers)
194
+ pool = None
195
+ if num_workers > 1:
196
+ pool = Pool(processes=num_workers - 1)
197
+ for worker_id in range(1, num_workers):
198
+ prefix = "{}{}".format(output_prefix, worker_id)
199
+ pool.apply_async(
200
+ binarize_alignments,
201
+ (
202
+ args,
203
+ input_file,
204
+ utils.parse_alignment,
205
+ prefix,
206
+ offsets[worker_id],
207
+ offsets[worker_id + 1],
208
+ ),
209
+ callback=merge_result,
210
+ )
211
+ pool.close()
212
+
213
+ ds = indexed_dataset.make_builder(
214
+ dataset_dest_file(args, output_prefix, None, "bin"), impl=args.dataset_impl
215
+ )
216
+
217
+ merge_result(
218
+ Binarizer.binarize_alignments(
219
+ input_file,
220
+ utils.parse_alignment,
221
+ lambda t: ds.add_item(t),
222
+ offset=0,
223
+ end=offsets[1],
224
+ )
225
+ )
226
+ if num_workers > 1:
227
+ pool.join()
228
+ for worker_id in range(1, num_workers):
229
+ prefix = "{}{}".format(output_prefix, worker_id)
230
+ temp_file_path = dataset_dest_prefix(args, prefix, None)
231
+ ds.merge_file_(temp_file_path)
232
+ os.remove(indexed_dataset.data_file_path(temp_file_path))
233
+ os.remove(indexed_dataset.index_file_path(temp_file_path))
234
+
235
+ ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
236
+
237
+ logger.info("[alignments] {}: parsed {} alignments".format(input_file, nseq[0]))
238
+
239
+ def make_dataset(vocab, input_prefix, output_prefix, lang, num_workers=1):
240
+ if args.dataset_impl == "raw":
241
+ # Copy original text file to destination folder
242
+ output_text_file = dest_path(
243
+ output_prefix + ".{}-{}".format(args.source_lang, args.target_lang),
244
+ lang,
245
+ )
246
+ shutil.copyfile(file_name(input_prefix, lang), output_text_file)
247
+ else:
248
+ make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers)
249
+
250
+ def make_all(lang, vocab):
251
+ if args.trainpref:
252
+ make_dataset(vocab, args.trainpref, "train", lang, num_workers=args.workers)
253
+ if args.validpref:
254
+ for k, validpref in enumerate(args.validpref.split(",")):
255
+ outprefix = "valid{}".format(k) if k > 0 else "valid"
256
+ make_dataset(
257
+ vocab, validpref, outprefix, lang, num_workers=args.workers
258
+ )
259
+ if args.testpref:
260
+ for k, testpref in enumerate(args.testpref.split(",")):
261
+ outprefix = "test{}".format(k) if k > 0 else "test"
262
+ make_dataset(vocab, testpref, outprefix, lang, num_workers=args.workers)
263
+
264
+ def make_all_alignments():
265
+ if args.trainpref and os.path.exists(args.trainpref + "." + args.align_suffix):
266
+ make_binary_alignment_dataset(
267
+ args.trainpref + "." + args.align_suffix,
268
+ "train.align",
269
+ num_workers=args.workers,
270
+ )
271
+ if args.validpref and os.path.exists(args.validpref + "." + args.align_suffix):
272
+ make_binary_alignment_dataset(
273
+ args.validpref + "." + args.align_suffix,
274
+ "valid.align",
275
+ num_workers=args.workers,
276
+ )
277
+ if args.testpref and os.path.exists(args.testpref + "." + args.align_suffix):
278
+ make_binary_alignment_dataset(
279
+ args.testpref + "." + args.align_suffix,
280
+ "test.align",
281
+ num_workers=args.workers,
282
+ )
283
+
284
+ make_all(args.source_lang, src_dict)
285
+ if target:
286
+ make_all(args.target_lang, tgt_dict)
287
+ if args.align_suffix:
288
+ make_all_alignments()
289
+
290
+ logger.info("Wrote preprocessed data to {}".format(args.destdir))
291
+
292
+ if args.alignfile:
293
+ assert args.trainpref, "--trainpref must be set if --alignfile is specified"
294
+ src_file_name = train_path(args.source_lang)
295
+ tgt_file_name = train_path(args.target_lang)
296
+ freq_map = {}
297
+ with open(args.alignfile, "r", encoding="utf-8") as align_file:
298
+ with open(src_file_name, "r", encoding="utf-8") as src_file:
299
+ with open(tgt_file_name, "r", encoding="utf-8") as tgt_file:
300
+ for a, s, t in zip_longest(align_file, src_file, tgt_file):
301
+ si = src_dict.encode_line(s, add_if_not_exist=False)
302
+ ti = tgt_dict.encode_line(t, add_if_not_exist=False)
303
+ ai = list(map(lambda x: tuple(x.split("-")), a.split()))
304
+ for sai, tai in ai:
305
+ srcidx = si[int(sai)]
306
+ tgtidx = ti[int(tai)]
307
+ if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk():
308
+ assert srcidx != src_dict.pad()
309
+ assert srcidx != src_dict.eos()
310
+ assert tgtidx != tgt_dict.pad()
311
+ assert tgtidx != tgt_dict.eos()
312
+
313
+ if srcidx not in freq_map:
314
+ freq_map[srcidx] = {}
315
+ if tgtidx not in freq_map[srcidx]:
316
+ freq_map[srcidx][tgtidx] = 1
317
+ else:
318
+ freq_map[srcidx][tgtidx] += 1
319
+
320
+ align_dict = {}
321
+ for srcidx in freq_map.keys():
322
+ align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get)
323
+
324
+ with open(
325
+ os.path.join(
326
+ args.destdir,
327
+ "alignment.{}-{}.txt".format(args.source_lang, args.target_lang),
328
+ ),
329
+ "w",
330
+ encoding="utf-8",
331
+ ) as f:
332
+ for k, v in align_dict.items():
333
+ print("{} {}".format(src_dict[k], tgt_dict[v]), file=f)
334
+
335
+
336
+ def binarize(args, filename, vocab, output_prefix, lang, offset, end, append_eos=True):
337
+ ds = indexed_dataset.make_builder(
338
+ dataset_dest_file(args, output_prefix, lang, "bin"),
339
+ impl=args.dataset_impl,
340
+ vocab_size=len(vocab),
341
+ )
342
+
343
+ def consumer(tensor):
344
+ ds.add_item(tensor)
345
+
346
+ res = Binarizer.binarize(
347
+ filename, vocab, consumer, append_eos=append_eos, offset=offset, end=end
348
+ )
349
+ ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
350
+ return res
351
+
352
+
353
+ def binarize_alignments(args, filename, parse_alignment, output_prefix, offset, end):
354
+ ds = indexed_dataset.make_builder(
355
+ dataset_dest_file(args, output_prefix, None, "bin"),
356
+ impl=args.dataset_impl,
357
+ vocab_size=None,
358
+ )
359
+
360
+ def consumer(tensor):
361
+ ds.add_item(tensor)
362
+
363
+ res = Binarizer.binarize_alignments(
364
+ filename, parse_alignment, consumer, offset=offset, end=end
365
+ )
366
+ ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
367
+ return res
368
+
369
+
370
+ def dataset_dest_prefix(args, output_prefix, lang):
371
+ base = "{}/{}".format(args.destdir, output_prefix)
372
+ if lang is not None:
373
+ lang_part = ".{}-{}.{}".format(args.source_lang, args.target_lang, lang)
374
+ elif args.only_source:
375
+ lang_part = ""
376
+ else:
377
+ lang_part = ".{}-{}".format(args.source_lang, args.target_lang)
378
+
379
+ return "{}{}".format(base, lang_part)
380
+
381
+
382
+ def dataset_dest_file(args, output_prefix, lang, extension):
383
+ base = dataset_dest_prefix(args, output_prefix, lang)
384
+ return "{}.{}".format(base, extension)
385
+
386
+
387
+ def get_offsets(input_file, num_workers):
388
+ return Binarizer.find_offsets(input_file, num_workers)
389
+
390
+
391
+ def cli_main():
392
+ parser = options.get_preprocessing_parser()
393
+ args = parser.parse_args()
394
+ main(args)
395
+
396
+
397
+ if __name__ == "__main__":
398
+ cli_main()
fairseq-0.10.2/fairseq_cli/score.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Facebook, Inc. and its affiliates.
3
+ #
4
+ # This source code is licensed under the MIT license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ """
7
+ BLEU scoring of generated translations against reference translations.
8
+ """
9
+
10
+ import argparse
11
+ import os
12
+ import sys
13
+
14
+ from fairseq.data import dictionary
15
+ from fairseq.scoring import bleu
16
+
17
+
18
+ def get_parser():
19
+ parser = argparse.ArgumentParser(
20
+ description="Command-line script for BLEU scoring."
21
+ )
22
+ # fmt: off
23
+ parser.add_argument('-s', '--sys', default='-', help='system output')
24
+ parser.add_argument('-r', '--ref', required=True, help='references')
25
+ parser.add_argument('-o', '--order', default=4, metavar='N',
26
+ type=int, help='consider ngrams up to this order')
27
+ parser.add_argument('--ignore-case', action='store_true',
28
+ help='case-insensitive scoring')
29
+ parser.add_argument('--sacrebleu', action='store_true',
30
+ help='score with sacrebleu')
31
+ parser.add_argument('--sentence-bleu', action='store_true',
32
+ help='report sentence-level BLEUs (i.e., with +1 smoothing)')
33
+ # fmt: on
34
+ return parser
35
+
36
+
37
+ def cli_main():
38
+ parser = get_parser()
39
+ args = parser.parse_args()
40
+ print(args)
41
+
42
+ assert args.sys == "-" or os.path.exists(
43
+ args.sys
44
+ ), "System output file {} does not exist".format(args.sys)
45
+ assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)
46
+
47
+ dict = dictionary.Dictionary()
48
+
49
+ def readlines(fd):
50
+ for line in fd.readlines():
51
+ if args.ignore_case:
52
+ yield line.lower()
53
+ else:
54
+ yield line
55
+
56
+ if args.sacrebleu:
57
+ import sacrebleu
58
+
59
+ def score(fdsys):
60
+ with open(args.ref) as fdref:
61
+ print(sacrebleu.corpus_bleu(fdsys, [fdref]))
62
+
63
+ elif args.sentence_bleu:
64
+
65
+ def score(fdsys):
66
+ with open(args.ref) as fdref:
67
+ scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
68
+ for i, (sys_tok, ref_tok) in enumerate(
69
+ zip(readlines(fdsys), readlines(fdref))
70
+ ):
71
+ scorer.reset(one_init=True)
72
+ sys_tok = dict.encode_line(sys_tok)
73
+ ref_tok = dict.encode_line(ref_tok)
74
+ scorer.add(ref_tok, sys_tok)
75
+ print(i, scorer.result_string(args.order))
76
+
77
+ else:
78
+
79
+ def score(fdsys):
80
+ with open(args.ref) as fdref:
81
+ scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
82
+ for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
83
+ sys_tok = dict.encode_line(sys_tok)
84
+ ref_tok = dict.encode_line(ref_tok)
85
+ scorer.add(ref_tok, sys_tok)
86
+ print(scorer.result_string(args.order))
87
+
88
+ if args.sys == "-":
89
+ score(sys.stdin)
90
+ else:
91
+ with open(args.sys, "r") as f:
92
+ score(f)
93
+
94
+
95
+ if __name__ == "__main__":
96
+ cli_main()
fairseq-0.10.2/tests/test_backtranslation_dataset.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import unittest
7
+
8
+ import tests.utils as test_utils
9
+ import torch
10
+ from fairseq.data import (
11
+ BacktranslationDataset,
12
+ LanguagePairDataset,
13
+ TransformEosDataset,
14
+ )
15
+ from fairseq.sequence_generator import SequenceGenerator
16
+
17
+
18
+ class TestBacktranslationDataset(unittest.TestCase):
19
+ def setUp(self):
20
+ (
21
+ self.tgt_dict,
22
+ self.w1,
23
+ self.w2,
24
+ self.src_tokens,
25
+ self.src_lengths,
26
+ self.model,
27
+ ) = test_utils.sequence_generator_setup()
28
+
29
+ dummy_src_samples = self.src_tokens
30
+
31
+ self.tgt_dataset = test_utils.TestDataset(data=dummy_src_samples)
32
+ self.cuda = torch.cuda.is_available()
33
+
34
+ def _backtranslation_dataset_helper(
35
+ self,
36
+ remove_eos_from_input_src,
37
+ remove_eos_from_output_src,
38
+ ):
39
+ tgt_dataset = LanguagePairDataset(
40
+ src=self.tgt_dataset,
41
+ src_sizes=self.tgt_dataset.sizes,
42
+ src_dict=self.tgt_dict,
43
+ tgt=None,
44
+ tgt_sizes=None,
45
+ tgt_dict=None,
46
+ )
47
+
48
+ generator = SequenceGenerator(
49
+ [self.model],
50
+ tgt_dict=self.tgt_dict,
51
+ max_len_a=0,
52
+ max_len_b=200,
53
+ beam_size=2,
54
+ unk_penalty=0,
55
+ )
56
+
57
+ backtranslation_dataset = BacktranslationDataset(
58
+ tgt_dataset=TransformEosDataset(
59
+ dataset=tgt_dataset,
60
+ eos=self.tgt_dict.eos(),
61
+ # remove eos from the input src
62
+ remove_eos_from_src=remove_eos_from_input_src,
63
+ ),
64
+ src_dict=self.tgt_dict,
65
+ backtranslation_fn=(
66
+ lambda sample: generator.generate([self.model], sample)
67
+ ),
68
+ output_collater=TransformEosDataset(
69
+ dataset=tgt_dataset,
70
+ eos=self.tgt_dict.eos(),
71
+ # if we remove eos from the input src, then we need to add it
72
+ # back to the output tgt
73
+ append_eos_to_tgt=remove_eos_from_input_src,
74
+ remove_eos_from_src=remove_eos_from_output_src,
75
+ ).collater,
76
+ cuda=self.cuda,
77
+ )
78
+ dataloader = torch.utils.data.DataLoader(
79
+ backtranslation_dataset,
80
+ batch_size=2,
81
+ collate_fn=backtranslation_dataset.collater,
82
+ )
83
+ backtranslation_batch_result = next(iter(dataloader))
84
+
85
+ eos, pad, w1, w2 = self.tgt_dict.eos(), self.tgt_dict.pad(), self.w1, self.w2
86
+
87
+ # Note that we sort by src_lengths and add left padding, so actually
88
+ # ids will look like: [1, 0]
89
+ expected_src = torch.LongTensor([[w1, w2, w1, eos], [pad, pad, w1, eos]])
90
+ if remove_eos_from_output_src:
91
+ expected_src = expected_src[:, :-1]
92
+ expected_tgt = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]])
93
+ generated_src = backtranslation_batch_result["net_input"]["src_tokens"]
94
+ tgt_tokens = backtranslation_batch_result["target"]
95
+
96
+ self.assertTensorEqual(expected_src, generated_src)
97
+ self.assertTensorEqual(expected_tgt, tgt_tokens)
98
+
99
+ def test_backtranslation_dataset_no_eos_in_output_src(self):
100
+ self._backtranslation_dataset_helper(
101
+ remove_eos_from_input_src=False,
102
+ remove_eos_from_output_src=True,
103
+ )
104
+
105
+ def test_backtranslation_dataset_with_eos_in_output_src(self):
106
+ self._backtranslation_dataset_helper(
107
+ remove_eos_from_input_src=False,
108
+ remove_eos_from_output_src=False,
109
+ )
110
+
111
+ def test_backtranslation_dataset_no_eos_in_input_src(self):
112
+ self._backtranslation_dataset_helper(
113
+ remove_eos_from_input_src=True,
114
+ remove_eos_from_output_src=False,
115
+ )
116
+
117
+ def assertTensorEqual(self, t1, t2):
118
+ self.assertEqual(t1.size(), t2.size(), "size mismatch")
119
+ self.assertEqual(t1.ne(t2).long().sum(), 0)
120
+
121
+
122
+ if __name__ == "__main__":
123
+ unittest.main()
fairseq-0.10.2/tests/test_constraints.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import sys
7
+ import unittest
8
+
9
+ import torch
10
+ from fairseq.token_generation_constraints import *
11
+
12
+
13
+ def tensorize(constraints: List[List[int]]) -> torch.Tensor:
14
+ return [torch.tensor(x) for x in constraints]
15
+
16
+
17
+ class TestHelperRoutines(unittest.TestCase):
18
+ def setUp(self):
19
+ self.examples = [
20
+ ([[]], torch.tensor([[0]])),
21
+ ([[], []], torch.tensor([[0], [0]])),
22
+ ([[torch.tensor([1, 2])], []], torch.tensor([[1, 1, 2, 0], [0, 0, 0, 0]])),
23
+ (
24
+ [
25
+ [
26
+ torch.tensor([3, 1, 2]),
27
+ torch.tensor([3]),
28
+ torch.tensor([4, 5, 6, 7]),
29
+ ],
30
+ [],
31
+ [torch.tensor([1, 8, 9, 10, 1, 4, 11, 12])],
32
+ ],
33
+ torch.tensor(
34
+ [
35
+ [3, 3, 1, 2, 0, 3, 0, 4, 5, 6, 7, 0],
36
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
37
+ [1, 1, 8, 9, 10, 1, 4, 11, 12, 0, 0, 0],
38
+ ]
39
+ ),
40
+ ),
41
+ ]
42
+
43
+ def test_packing(self):
44
+ """Ensures the list of lists of tensors gets packed correctly."""
45
+ for batch_constraints, expected_tensor in self.examples:
46
+ packed = pack_constraints(batch_constraints)
47
+ assert torch.equal(packed, expected_tensor)
48
+
49
+
50
+ class TestUnorderedConstraintState(unittest.TestCase):
51
+ def setUp(self):
52
+ # Tuples of (contraint set, expected printed graph, token counts per node)
53
+ self.examples = [
54
+ (
55
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
56
+ "([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
57
+ {1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
58
+ ),
59
+ ([], "[None].False#0", {}),
60
+ (tensorize([[0]]), "([None].False#1 [0].True#1)", {0: 1}),
61
+ (
62
+ tensorize([[100000, 1, 2, 3, 4, 5]]),
63
+ "([None].False#1 ([100000].False#1 ([1].False#1 ([2].False#1 ([3].False#1 ([4].False#1 [5].True#1))))))",
64
+ {100000: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1},
65
+ ),
66
+ (
67
+ tensorize([[1, 2], [1, 2]]),
68
+ "([None].False#2 ([1].False#2 [2].True#2))",
69
+ {1: 2, 2: 2},
70
+ ),
71
+ (
72
+ tensorize([[1, 2], [3, 4]]),
73
+ "([None].False#2 ([1].False#1 [2].True#1) ([3].False#1 [4].True#1))",
74
+ {1: 1, 2: 1, 3: 1, 4: 1},
75
+ ),
76
+ ]
77
+
78
+ self.sequences = [
79
+ (
80
+ self.examples[0][0],
81
+ [],
82
+ {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
83
+ ),
84
+ (
85
+ self.examples[0][0],
86
+ [1, 2],
87
+ {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
88
+ ),
89
+ (
90
+ self.examples[0][0],
91
+ [1, 2, 94],
92
+ {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
93
+ ),
94
+ (
95
+ self.examples[0][0],
96
+ [1, 3, 999, 1, 4],
97
+ {"bank": 4, "num_completed": 2, "finished": False, "is_root": False},
98
+ ),
99
+ (
100
+ self.examples[0][0],
101
+ [1, 3, 999, 1, 4, 999],
102
+ {"bank": 4, "num_completed": 2, "finished": False, "is_root": True},
103
+ ),
104
+ (
105
+ self.examples[0][0],
106
+ [4, 5, 6, 8],
107
+ {"bank": 2, "num_completed": 1, "finished": False, "is_root": True},
108
+ ),
109
+ (
110
+ self.examples[0][0],
111
+ # Tricky, because in last three, goes down [1->4] branch, could miss [1] and [4->5]
112
+ # [[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]],
113
+ [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
114
+ {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
115
+ ),
116
+ (
117
+ self.examples[0][0],
118
+ [1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
119
+ {"bank": 14, "num_completed": 6, "finished": True, "is_root": True},
120
+ ),
121
+ (
122
+ tensorize([[1], [2, 3]]),
123
+ # Should not be able to get credit for entering 1 a second time
124
+ [1, 1],
125
+ {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
126
+ ),
127
+ (
128
+ self.examples[4][0],
129
+ [1, 2, 1, 2],
130
+ {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
131
+ ),
132
+ (
133
+ self.examples[4][0],
134
+ [1, 2, 1, 2, 1],
135
+ {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
136
+ ),
137
+ (
138
+ self.examples[5][0],
139
+ [1, 2, 3, 4, 5],
140
+ {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
141
+ ),
142
+ ]
143
+
144
+ def test_graphs(self):
145
+ """
146
+ Test whether unordered graph systems are created correctly.
147
+ """
148
+ for example in self.examples:
149
+ constraints, expected, gold_counts = example
150
+ c = ConstraintNode.create(constraints)
151
+ assert (
152
+ ConstraintNode.print_graph(c) == expected
153
+ ), f"got {ConstraintNode.print_graph(c)}, expected {expected}"
154
+ assert (
155
+ c.token_counts() == gold_counts
156
+ ), f"{c} got {c.token_counts()} wanted {gold_counts}"
157
+
158
+ def test_next_tokens(self):
159
+ """
160
+ Tests that the set of next tokens is correct.
161
+ """
162
+ for example in self.examples:
163
+ constraints, expected, gold_counts = example
164
+ root = ConstraintNode.create(constraints)
165
+
166
+ root_tokens = set(root.children.keys())
167
+ for sequence in constraints:
168
+ state = UnorderedConstraintState(root)
169
+ for token in sequence:
170
+ all_tokens = root_tokens.union(state.node.children.keys())
171
+ assert (
172
+ all_tokens == state.next_tokens()
173
+ ), f"ALL {all_tokens} NEXT {state.next_tokens()}"
174
+ state = state.advance(token)
175
+
176
+ def test_sequences(self):
177
+ for constraints, tokens, expected in self.sequences:
178
+ state = UnorderedConstraintState.create(pack_constraints([constraints])[0])
179
+ for token in tokens:
180
+ state = state.advance(token)
181
+ result = {}
182
+ for attr in expected.keys():
183
+ result[attr] = getattr(state, attr)
184
+
185
+ assert (
186
+ result == expected
187
+ ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
188
+
189
+
190
+ class TestOrderedConstraintState(unittest.TestCase):
191
+ def setUp(self):
192
+ self.sequences = [
193
+ (
194
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
195
+ [],
196
+ {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
197
+ ),
198
+ (
199
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
200
+ [1, 2],
201
+ {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
202
+ ),
203
+ (
204
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
205
+ [1, 2, 94],
206
+ {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
207
+ ),
208
+ (
209
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
210
+ [1, 3, 999, 1, 4],
211
+ {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
212
+ ),
213
+ (
214
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
215
+ [1, 2, 3, 999, 999],
216
+ {"bank": 3, "num_completed": 1, "finished": False, "is_root": False},
217
+ ),
218
+ (
219
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
220
+ [1, 2, 3, 77, 1, 3, 1],
221
+ {"bank": 6, "num_completed": 2, "finished": False, "is_root": False},
222
+ ),
223
+ (
224
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
225
+ [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
226
+ {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
227
+ ),
228
+ (
229
+ tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
230
+ [1, 2, 999, 1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
231
+ {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
232
+ ),
233
+ (
234
+ tensorize([[1], [2, 3]]),
235
+ [1, 1],
236
+ {"bank": 1, "num_completed": 1, "finished": False, "is_root": False},
237
+ ),
238
+ (
239
+ tensorize([[1, 2], [1, 2]]),
240
+ [1, 2, 1, 2],
241
+ {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
242
+ ),
243
+ (
244
+ tensorize([[1, 2], [1, 2]]),
245
+ [1, 2, 1, 2, 1],
246
+ {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
247
+ ),
248
+ (
249
+ tensorize([[1, 2], [3, 4]]),
250
+ [1, 2, 3, 4, 5],
251
+ {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
252
+ ),
253
+ ]
254
+
255
+ def test_sequences(self):
256
+ for i, (constraints, tokens, expected) in enumerate(self.sequences):
257
+ state = OrderedConstraintState.create(pack_constraints([constraints])[0])
258
+ for token in tokens:
259
+ state = state.advance(token)
260
+ result = {}
261
+ for attr in expected.keys():
262
+ result[attr] = getattr(state, attr)
263
+ assert (
264
+ result == expected
265
+ ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
266
+
267
+
268
+ if __name__ == "__main__":
269
+ unittest.main()
fairseq-0.10.2/tests/test_convtbc.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import unittest
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ from fairseq.modules import ConvTBC
11
+
12
+
13
+ class TestConvTBC(unittest.TestCase):
14
+ def test_convtbc(self):
15
+ # ksz, in_channels, out_channels
16
+ conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1)
17
+ # out_channels, in_channels, ksz
18
+ conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1)
19
+
20
+ conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2))
21
+ conv_tbc.bias.data.copy_(conv1d.bias.data)
22
+
23
+ input_tbc = torch.randn(7, 2, 4, requires_grad=True)
24
+ input1d = input_tbc.data.transpose(0, 1).transpose(1, 2)
25
+ input1d.requires_grad = True
26
+
27
+ output_tbc = conv_tbc(input_tbc)
28
+ output1d = conv1d(input1d)
29
+
30
+ self.assertAlmostEqual(
31
+ output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data
32
+ )
33
+
34
+ grad_tbc = torch.randn(output_tbc.size())
35
+ grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous()
36
+
37
+ output_tbc.backward(grad_tbc)
38
+ output1d.backward(grad1d)
39
+
40
+ self.assertAlmostEqual(
41
+ conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data
42
+ )
43
+ self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data)
44
+ self.assertAlmostEqual(
45
+ input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data
46
+ )
47
+
48
+ def assertAlmostEqual(self, t1, t2):
49
+ self.assertEqual(t1.size(), t2.size(), "size mismatch")
50
+ self.assertLess((t1 - t2).abs().max(), 1e-4)
51
+
52
+
53
+ if __name__ == "__main__":
54
+ unittest.main()
fairseq-0.10.2/tests/test_inference_dropout.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import logging
7
+ import unittest
8
+
9
+ from fairseq.models.transformer import TransformerModel
10
+ from tests.test_sequence_generator import get_dummy_task_and_parser
11
+
12
+
13
+ class TestInferenceDropout(unittest.TestCase):
14
+ def setUp(self):
15
+ self.task, self.parser = get_dummy_task_and_parser()
16
+ TransformerModel.add_args(self.parser)
17
+ self.args = self.parser.parse_args([])
18
+ self.args.encoder_layers = 2
19
+ self.args.decoder_layers = 1
20
+ logging.disable(logging.CRITICAL)
21
+
22
+ def tearDown(self):
23
+ logging.disable(logging.NOTSET)
24
+
25
+ def test_sets_inference_dropout_to_true(self):
26
+ self.args.retain_dropout = True
27
+ self.transformer_model = TransformerModel.build_model(self.args, self.task)
28
+ self.transformer_model.prepare_for_inference_(self.args)
29
+ assert self.transformer_model.encoder.dropout_module.apply_during_inference
30
+ assert self.transformer_model.decoder.dropout_module.apply_during_inference
31
+ for layer in self.transformer_model.encoder.layers:
32
+ assert layer.dropout_module.apply_during_inference
33
+
34
+ def test_inference_dropout_false_by_default(self):
35
+ self.transformer_model = TransformerModel.build_model(self.args, self.task)
36
+ self.transformer_model.prepare_for_inference_(self.args)
37
+ assert not self.transformer_model.encoder.dropout_module.apply_during_inference
38
+ assert not self.transformer_model.decoder.dropout_module.apply_during_inference
39
+ for layer in self.transformer_model.encoder.layers:
40
+ assert not layer.dropout_module.apply_during_inference
41
+ for layer in self.transformer_model.decoder.layers:
42
+ assert not layer.dropout_module.apply_during_inference
43
+
44
+ def test_applies_training_mode(self):
45
+ self.transformer_model = TransformerModel.build_model(self.args, self.task)
46
+ assert self.transformer_model.encoder.dropout_module.training
47
+ for layer in self.transformer_model.encoder.layers:
48
+ assert layer.dropout_module.training
49
+
50
+ self.transformer_model.eval()
51
+ assert not self.transformer_model.decoder.dropout_module.training
52
+ for layer in self.transformer_model.encoder.layers:
53
+ assert not layer.dropout_module.training
54
+
55
+ def test_retain_modules(self):
56
+ self.args.retain_dropout = True
57
+ self.args.retain_dropout_modules = [
58
+ "TransformerEncoder",
59
+ "TransformerEncoderLayer",
60
+ ]
61
+ self.transformer_model = TransformerModel.build_model(self.args, self.task)
62
+ self.transformer_model.prepare_for_inference_(self.args)
63
+ assert self.transformer_model.encoder.dropout_module.apply_during_inference
64
+ assert not self.transformer_model.decoder.dropout_module.apply_during_inference
65
+ for layer in self.transformer_model.decoder.layers:
66
+ assert not layer.dropout_module.apply_during_inference
fairseq-0.10.2/tests/test_metrics.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import unittest
7
+ import uuid
8
+
9
+ from fairseq import metrics
10
+
11
+
12
+ class TestMetrics(unittest.TestCase):
13
+ def test_nesting(self):
14
+ with metrics.aggregate() as a:
15
+ metrics.log_scalar("loss", 1)
16
+ with metrics.aggregate() as b:
17
+ metrics.log_scalar("loss", 2)
18
+
19
+ self.assertEqual(a.get_smoothed_values()["loss"], 1.5)
20
+ self.assertEqual(b.get_smoothed_values()["loss"], 2)
21
+
22
+ def test_new_root(self):
23
+ with metrics.aggregate() as a:
24
+ metrics.log_scalar("loss", 1)
25
+ with metrics.aggregate(new_root=True) as b:
26
+ metrics.log_scalar("loss", 2)
27
+
28
+ self.assertEqual(a.get_smoothed_values()["loss"], 1)
29
+ self.assertEqual(b.get_smoothed_values()["loss"], 2)
30
+
31
+ def test_nested_new_root(self):
32
+ with metrics.aggregate() as layer1:
33
+ metrics.log_scalar("loss", 1)
34
+ with metrics.aggregate(new_root=True) as layer2:
35
+ metrics.log_scalar("loss", 2)
36
+ with metrics.aggregate() as layer3:
37
+ metrics.log_scalar("loss", 3)
38
+ with metrics.aggregate(new_root=True) as layer4:
39
+ metrics.log_scalar("loss", 4)
40
+ metrics.log_scalar("loss", 1.5)
41
+
42
+ self.assertEqual(layer4.get_smoothed_values()["loss"], 4)
43
+ self.assertEqual(layer3.get_smoothed_values()["loss"], 3)
44
+ self.assertEqual(layer2.get_smoothed_values()["loss"], 2.5)
45
+ self.assertEqual(layer1.get_smoothed_values()["loss"], 1.25)
46
+
47
+ def test_named(self):
48
+ name = str(uuid.uuid4())
49
+ metrics.reset_meters(name)
50
+
51
+ with metrics.aggregate(name):
52
+ metrics.log_scalar("loss", 1)
53
+
54
+ metrics.log_scalar("loss", 3)
55
+
56
+ with metrics.aggregate(name):
57
+ metrics.log_scalar("loss", 2)
58
+
59
+ self.assertEqual(metrics.get_smoothed_values(name)["loss"], 1.5)
60
+
61
+ def test_nested_duplicate_names(self):
62
+ name = str(uuid.uuid4())
63
+ metrics.reset_meters(name)
64
+
65
+ with metrics.aggregate(name):
66
+ metrics.log_scalar("loss", 1)
67
+ with metrics.aggregate() as other:
68
+ with metrics.aggregate(name):
69
+ metrics.log_scalar("loss", 2)
70
+ metrics.log_scalar("loss", 6)
71
+
72
+ self.assertEqual(metrics.get_smoothed_values(name)["loss"], 3)
73
+ self.assertEqual(other.get_smoothed_values()["loss"], 2)
74
+
75
+
76
+ if __name__ == "__main__":
77
+ unittest.main()
fairseq-0.10.2/tests/test_sequence_scorer.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import argparse
7
+ import unittest
8
+
9
+ import tests.utils as test_utils
10
+ import torch
11
+ from fairseq.sequence_scorer import SequenceScorer
12
+
13
+
14
+ class TestSequenceScorer(unittest.TestCase):
15
+ def test_sequence_scorer(self):
16
+ # construct dummy dictionary
17
+ d = test_utils.dummy_dictionary(vocab_size=2)
18
+ self.assertEqual(d.pad(), 1)
19
+ self.assertEqual(d.eos(), 2)
20
+ self.assertEqual(d.unk(), 3)
21
+ eos = d.eos()
22
+ w1 = 4
23
+ w2 = 5
24
+
25
+ # construct dataloader
26
+ data = [
27
+ {
28
+ "source": torch.LongTensor([w1, w2, eos]),
29
+ "target": torch.LongTensor([w1, w2, w1, eos]),
30
+ },
31
+ {
32
+ "source": torch.LongTensor([w2, eos]),
33
+ "target": torch.LongTensor([w2, w1, eos]),
34
+ },
35
+ {
36
+ "source": torch.LongTensor([w2, eos]),
37
+ "target": torch.LongTensor([w2, eos]),
38
+ },
39
+ ]
40
+ data_itr = test_utils.dummy_dataloader(data)
41
+
42
+ # specify expected output probabilities
43
+ args = argparse.Namespace()
44
+ unk = 0.0
45
+ args.beam_probs = [
46
+ # step 0:
47
+ torch.FloatTensor(
48
+ [
49
+ # eos w1 w2
50
+ [0.0, unk, 0.6, 0.4], # sentence 1
51
+ [0.0, unk, 0.4, 0.6], # sentence 2
52
+ [0.0, unk, 0.7, 0.3], # sentence 3
53
+ ]
54
+ ),
55
+ # step 1:
56
+ torch.FloatTensor(
57
+ [
58
+ # eos w1 w2
59
+ [0.0, unk, 0.2, 0.7], # sentence 1
60
+ [0.0, unk, 0.8, 0.2], # sentence 2
61
+ [0.7, unk, 0.1, 0.2], # sentence 3
62
+ ]
63
+ ),
64
+ # step 2:
65
+ torch.FloatTensor(
66
+ [
67
+ # eos w1 w2
68
+ [0.10, unk, 0.50, 0.4], # sentence 1
69
+ [0.15, unk, 0.15, 0.7], # sentence 2
70
+ [0.00, unk, 0.00, 0.0], # sentence 3
71
+ ]
72
+ ),
73
+ # step 3:
74
+ torch.FloatTensor(
75
+ [
76
+ # eos w1 w2
77
+ [0.9, unk, 0.05, 0.05], # sentence 1
78
+ [0.0, unk, 0.00, 0.0], # sentence 2
79
+ [0.0, unk, 0.00, 0.0], # sentence 3
80
+ ]
81
+ ),
82
+ ]
83
+ expected_scores = [
84
+ [0.6, 0.7, 0.5, 0.9], # sentence 1
85
+ [0.6, 0.8, 0.15], # sentence 2
86
+ [0.3, 0.7], # sentence 3
87
+ ]
88
+
89
+ task = test_utils.TestTranslationTask.setup_task(args, d, d)
90
+ model = task.build_model(args)
91
+ scorer = SequenceScorer(task.target_dictionary)
92
+ for sample in data_itr:
93
+ hypos = task.inference_step(scorer, [model], sample)
94
+ for id, hypos_id in zip(sample["id"].tolist(), hypos):
95
+ self.assertHypoTokens(hypos_id[0], data[id]["target"])
96
+ self.assertHypoScore(hypos_id[0], expected_scores[id])
97
+
98
+ def assertHypoTokens(self, hypo, tokens):
99
+ self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens))
100
+
101
+ def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
102
+ pos_scores = torch.FloatTensor(pos_probs).log()
103
+ self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
104
+ self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
105
+ score = pos_scores.sum()
106
+ if normalized:
107
+ score /= pos_scores.numel() ** lenpen
108
+ self.assertLess(abs(score - hypo["score"]), 1e-6)
109
+
110
+ def assertAlmostEqual(self, t1, t2):
111
+ self.assertEqual(t1.size(), t2.size(), "size mismatch")
112
+ self.assertLess((t1 - t2).abs().max(), 1e-4)
113
+
114
+ def assertTensorEqual(self, t1, t2):
115
+ self.assertEqual(t1.size(), t2.size(), "size mismatch")
116
+ self.assertEqual(t1.ne(t2).long().sum(), 0)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ unittest.main()
fairseq-0.10.2/tests/test_token_block_dataset.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import unittest
7
+
8
+ import tests.utils as test_utils
9
+ import torch
10
+ from fairseq.data import TokenBlockDataset
11
+
12
+
13
+ class TestTokenBlockDataset(unittest.TestCase):
14
+ def _build_dataset(self, data, **kwargs):
15
+ sizes = [len(x) for x in data]
16
+ underlying_ds = test_utils.TestDataset(data)
17
+ return TokenBlockDataset(underlying_ds, sizes, **kwargs)
18
+
19
+ def test_eos_break_mode(self):
20
+ data = [
21
+ torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
22
+ torch.tensor([1], dtype=torch.long),
23
+ torch.tensor([8, 7, 6, 1], dtype=torch.long),
24
+ ]
25
+ ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
26
+ self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
27
+ self.assertEqual(ds[1].tolist(), [1])
28
+ self.assertEqual(ds[2].tolist(), [8, 7, 6, 1])
29
+
30
+ data = [
31
+ torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
32
+ torch.tensor([8, 7, 6, 1], dtype=torch.long),
33
+ torch.tensor([1], dtype=torch.long),
34
+ ]
35
+ ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
36
+ self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
37
+ self.assertEqual(ds[1].tolist(), [8, 7, 6, 1])
38
+ self.assertEqual(ds[2].tolist(), [1])
39
+
40
+ def test_block_break_mode(self):
41
+ data = [
42
+ torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
43
+ torch.tensor([8, 7, 6, 1], dtype=torch.long),
44
+ torch.tensor([9, 1], dtype=torch.long),
45
+ ]
46
+ ds = self._build_dataset(data, block_size=3, pad=0, eos=1, break_mode="none")
47
+ self.assertEqual(ds[0].tolist(), [5, 4, 3])
48
+ self.assertEqual(ds[1].tolist(), [2, 1, 8])
49
+ self.assertEqual(ds[2].tolist(), [7, 6, 1])
50
+ self.assertEqual(ds[3].tolist(), [9, 1])
51
+
52
+ def test_complete_break_mode(self):
53
+ data = [
54
+ torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
55
+ torch.tensor([8, 7, 6, 1], dtype=torch.long),
56
+ torch.tensor([9, 1], dtype=torch.long),
57
+ ]
58
+ ds = self._build_dataset(
59
+ data, block_size=6, pad=0, eos=1, break_mode="complete"
60
+ )
61
+ self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
62
+ self.assertEqual(ds[1].tolist(), [8, 7, 6, 1, 9, 1])
63
+
64
+ data = [
65
+ torch.tensor([4, 3, 2, 1], dtype=torch.long),
66
+ torch.tensor([5, 1], dtype=torch.long),
67
+ torch.tensor([1], dtype=torch.long),
68
+ torch.tensor([6, 1], dtype=torch.long),
69
+ ]
70
+ ds = self._build_dataset(
71
+ data, block_size=3, pad=0, eos=1, break_mode="complete"
72
+ )
73
+ self.assertEqual(ds[0].tolist(), [4, 3, 2, 1])
74
+ self.assertEqual(ds[1].tolist(), [5, 1, 1])
75
+ self.assertEqual(ds[2].tolist(), [6, 1])
76
+
77
+
78
+ if __name__ == "__main__":
79
+ unittest.main()
mosesdecoder/.beautify-ignore ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Files and directories that beautify.py should not clean up.
2
+ #
3
+ # This file is not as advanced as, say, .gitignore. It only supports files
4
+ # and directory paths relative to the project root, one per line, no globs,
5
+ # no quotes.
6
+ #
7
+ # Leading and trailing whitespace is stripped from filenames, but internal
8
+ # whitespace is preserved.
9
+ #
10
+ # Lines starting with a hash mark, such as this one, are comments. The hash
11
+ # mark must be the first character on the line. Blank lines are ignored.
12
+ #
13
+ # The .beautify-ignore file must be encoded in UTF-8.
14
+
15
+ boost
16
+ contrib
17
+ irstlm
18
+ jam-files
19
+ lm
20
+ mingw/MosesGUI/icons_rc.py
21
+ mingw/MosesGUI/Ui_credits.py
22
+ mingw/MosesGUI/Ui_mainWindow.py
23
+ moses/TranslationModel/UG
24
+ moses/server
25
+ moses/parameters
26
+ moses/thread_safe_container.h
27
+ phrase-extract/pcfg-common
28
+ phrase-extract/syntax-common
29
+ randlm
30
+ # Filename suffixes in here are language codes, so e.g. ".pl" means
31
+ # Polish, not Perl.
32
+ scripts/share/nonbreaking_prefixes
33
+ search
34
+ srilm
35
+ util
36
+ xmlrpc-c
37
+ .git
38
+ util/ug_cache_with_timeout.h
mosesdecoder/.travis.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sudo: false
2
+ dist: trusty
3
+ language: c
4
+ compiler: gcc
5
+ env:
6
+ matrix:
7
+ addons:
8
+ apt:
9
+ sources:
10
+ - ubuntu-toolchain-r-test
11
+ packages:
12
+ - subversion
13
+ - automake
14
+ - libtool
15
+ - zlib1g-dev
16
+ - libbz2-dev
17
+ - liblzma-dev
18
+ - libboost-all-dev
19
+ - libgoogle-perftools-dev
20
+ - libxmlrpc-c++.*-dev
21
+ - cmake
22
+ - csh
23
+ script:
24
+ - ./bjam -j4
mosesdecoder/azure-pipelines.yml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Starter pipeline
2
+ # Start with a minimal pipeline that you can customize to build and deploy your code.
3
+ # Add steps that build, run tests, deploy, and more:
4
+ # https://aka.ms/yaml
5
+
6
+ trigger:
7
+ - master
8
+
9
+ pool:
10
+ #vmImage: 'ubuntu-latest'
11
+ vmImage: 'ubuntu-16.04'
12
+
13
+ steps:
14
+
15
+ - script: |
16
+ echo Printing some environment information
17
+ echo HOME: $HOME
18
+ echo
19
+ echo UBUNTU VERSION:
20
+ cat /etc/lsb-release
21
+ echo
22
+ echo CPU INFO
23
+ cat /proc/cpuinfo
24
+ echo
25
+ echo MEM INFO
26
+ cat /proc/meminfo
27
+ echo
28
+ echo DISK INFO
29
+ df -h
30
+ echo
31
+ echo PWD: $PWD
32
+ echo
33
+ ls
34
+ displayName: 'Printing some environment information'
35
+
36
+
37
+ ## Installation commands for Ubuntu
38
+ - script: |
39
+ sudo apt-get install \
40
+ g++ \
41
+ git \
42
+ subversion \
43
+ automake \
44
+ libtool \
45
+ zlib1g-dev \
46
+ libicu-dev \
47
+ libboost-all-dev \
48
+ libssl-dev \
49
+ libbz2-dev \
50
+ liblzma-dev \
51
+ python-dev \
52
+ graphviz \
53
+ imagemagick \
54
+ make \
55
+ cmake \
56
+ libgoogle-perftools-dev \
57
+ autoconf \
58
+ doxygen
59
+ displayName: 'Install Ubuntu packages'
60
+
61
+ - script: |
62
+ wget "https://sourceforge.net/projects/cmph/files/v2.0.2/cmph-2.0.2.tar.gz/download"
63
+ mv download cmph-2.0.2.tar.gz
64
+ tar xvzf cmph-2.0.2.tar.gz
65
+ cd cmph-2.0.2
66
+ ./configure --prefix=$PWD
67
+ make
68
+ make install
69
+ cd ..
70
+ displayName: 'Build and Install cmph'
71
+
72
+ - script: |
73
+ wget "https://sourceforge.net/projects/xmlrpc-c/files/Xmlrpc-c%20Super%20Stable/1.51.06/xmlrpc-c-1.51.06.tgz/download"
74
+ mv download xmlrpc-c-1.51.06.tgz
75
+ tar xvzf xmlrpc-c-1.51.06.tgz
76
+ cd xmlrpc-c-1.51.06
77
+ ./configure --prefix=$PWD
78
+ make
79
+ make install
80
+ sudo ldconfig
81
+ cd ..
82
+ displayName: 'Build and Install xmlrpc-c'
83
+
84
+ - script: |
85
+ ./bjam \
86
+ --with-cmph=$PWD/cmph-2.0.2 \
87
+ --with-xmlrpc-c=$PWD/xmlrpc-c-1.51.06 \
88
+ -j2
89
+ displayName: 'Build Moses'
90
+
91
+ # - script: |
92
+ # ./bjam \
93
+ # -j2
94
+ # displayName: 'Build Moses'
95
+
96
+ # - task: ComponentGovernanceComponentDetection@0
97
+ # inputs:
98
+ # scanType: 'Register'
99
+ # verbosity: 'Verbose'
100
+ # alertWarningLevel: 'High'
mosesdecoder/biconcor/Jamfile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
2
+ exe phrase-lookup : Vocabulary.cpp SuffixArray.cpp phrase-lookup.cpp ;
mosesdecoder/biconcor/PhrasePairCollection.cpp ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "PhrasePairCollection.h"
2
+
3
+ #include <cstdlib>
4
+ #include <cstring>
5
+ #include <algorithm>
6
+
7
+ #include "Vocabulary.h"
8
+ #include "SuffixArray.h"
9
+ #include "TargetCorpus.h"
10
+ #include "Alignment.h"
11
+ #include "PhrasePair.h"
12
+ #include "Mismatch.h"
13
+
14
+ using namespace std;
15
+
16
+ PhrasePairCollection::PhrasePairCollection( SuffixArray *sa, TargetCorpus *tc, Alignment *a, int max_translation, int max_example )
17
+ :m_suffixArray(sa)
18
+ ,m_targetCorpus(tc)
19
+ ,m_alignment(a)
20
+ ,m_size(0)
21
+ ,m_max_lookup(10000) // maximum number of source occurrences sampled
22
+ ,m_max_translation(max_translation) // max number of different distinct translations returned
23
+ ,m_max_example(max_example) // max number of examples returned for each distinct translation
24
+ {}
25
+
26
+ PhrasePairCollection::~PhrasePairCollection()
27
+ {}
28
+
29
+ int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
30
+ {
31
+ INDEX first_match, last_match;
32
+ if (! m_suffixArray->FindMatches( sourceString, first_match, last_match )) {
33
+ return 0;
34
+ }
35
+ //cerr << "\tfirst match " << first_match << endl;
36
+ //cerr << "\tlast match " << last_match << endl;
37
+
38
+ INDEX found = last_match - first_match +1;
39
+
40
+ map< vector< WORD_ID >, INDEX > index;
41
+ int real_count = 0;
42
+ for( INDEX i=first_match; i<=last_match; i++ ) {
43
+ int position = m_suffixArray->GetPosition( i );
44
+ int source_start = m_suffixArray->GetWordInSentence( position );
45
+ int source_end = source_start + sourceString.size()-1;
46
+ INDEX sentence_id = m_suffixArray->GetSentence( position );
47
+ int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );
48
+ int target_length = m_targetCorpus->GetSentenceLength( sentence_id );
49
+ //cerr << "match " << (i-first_match)
50
+ //<< " in sentence " << sentence_id
51
+ //<< ", starting at word " << source_start
52
+ //<< " of " << sentence_length
53
+ //<< ". target sentence has " << target_length << " words.";
54
+ int target_start, target_end, pre_null, post_null;
55
+ if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
56
+ //cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
57
+ //cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
58
+ bool null_boundary_words = false;
59
+ for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
60
+ for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
61
+ vector< WORD_ID > targetString;
62
+ //cerr << "; ";
63
+ for (int target = target_start - pre; target <= target_end + post; target++) {
64
+ targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );
65
+ //cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";
66
+ }
67
+ PhrasePair *phrasePair = new PhrasePair( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, target_length, position, source_start, source_end, target_start-pre, target_end+post, pre, post, pre_null-pre, post_null-post);
68
+ // matchCollection.Add( sentence_id, )
69
+ if (index.find( targetString ) == index.end()) {
70
+ index[targetString] = m_collection.size();
71
+ vector< PhrasePair* > emptyVector;
72
+ m_collection.push_back( emptyVector );
73
+ }
74
+ m_collection[ index[targetString] ].push_back( phrasePair );
75
+ m_size++;
76
+ }
77
+ }
78
+ } else {
79
+ //cerr << "mismatch " << (i-first_match)
80
+ // << " in sentence " << sentence_id
81
+ // << ", starting at word " << source_start
82
+ // << " of " << sentence_length
83
+ // << ". target sentence has " << target_length << " words.";
84
+ Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
85
+ if (mismatch->Unaligned())
86
+ m_unaligned.push_back( mismatch );
87
+ else
88
+ m_mismatch.push_back( mismatch );
89
+ }
90
+ //cerr << endl;
91
+
92
+ if (found > (INDEX)m_max_lookup) {
93
+ i += found/m_max_lookup-1;
94
+ }
95
+ real_count++;
96
+ }
97
+ sort(m_collection.begin(), m_collection.end(), CompareBySize());
98
+ return real_count;
99
+ }
100
+
101
+ void PhrasePairCollection::Print(bool pretty) const
102
+ {
103
+ vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
104
+ int i=0;
105
+ for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && i<m_max_translation; i++, ppWithSameTarget++ ) {
106
+ (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
107
+ int count = ppWithSameTarget->size();
108
+ cout << "(" << count << ")" << endl;
109
+ vector< PhrasePair* >::const_iterator p = ppWithSameTarget->begin();
110
+ for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
111
+ if (pretty) {
112
+ (*p)->PrintPretty( &cout, 100 );
113
+ } else {
114
+ (*p)->Print( &cout );
115
+ }
116
+ if (ppWithSameTarget->size() > m_max_example) {
117
+ p += ppWithSameTarget->size()/m_max_example-1;
118
+ }
119
+ }
120
+ }
121
+ }
122
+
123
+ void PhrasePairCollection::PrintHTML() const
124
+ {
125
+ int pp_target = 0;
126
+ bool singleton = false;
127
+ // loop over all translations
128
+ vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
129
+ for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {
130
+
131
+ int count = ppWithSameTarget->size();
132
+ if (!singleton) {
133
+ if (count == 1) {
134
+ singleton = true;
135
+ cout << "<p class=\"pp_singleton_header\">singleton"
136
+ << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
137
+ << (m_collection.end() - ppWithSameTarget)
138
+ << "/" << m_size << ")</p>";
139
+ } else {
140
+ cout << "<p class=\"pp_target_header\">";
141
+ (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
142
+ cout << " (" << count << "/" << m_size << ")" << endl;
143
+ cout << "<p><div id=\"pp_" << pp_target << "\">";
144
+ }
145
+ cout << "<table align=\"center\">";
146
+ }
147
+
148
+ vector< PhrasePair* >::const_iterator p;
149
+ // loop over all sentences where translation occurs
150
+ int pp=0;
151
+ int i=0;
152
+ for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
153
+ (*p)->PrintClippedHTML( &cout, 160 );
154
+ if (count > m_max_example) {
155
+ p += count/m_max_example-1;
156
+ pp += count/m_max_example-1;
157
+ }
158
+ }
159
+ if (i == 10 && pp < count) {
160
+ // extended table
161
+ cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
162
+ cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
163
+ cout << "<table align=\"center\">";
164
+ for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
165
+ (*p)->PrintClippedHTML( &cout, 160 );
166
+ if (count > m_max_example) {
167
+ p += count/m_max_example-1;
168
+ pp += count/m_max_example-1;
169
+ }
170
+ }
171
+ }
172
+ if (!singleton) cout << "</table></div>\n";
173
+
174
+ if (!singleton && pp_target == 9) {
175
+ cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
176
+ cout << "<p class=\"pp_target_header\">(more)</p></div>";
177
+ cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
178
+ }
179
+ }
180
+ if (singleton) cout << "</table></div>\n";
181
+ else if (pp_target > 9) cout << "</div>";
182
+
183
+ size_t max_mismatch = m_max_example/3;
184
+ // unaligned phrases
185
+ if (m_unaligned.size() > 0) {
186
+ cout << "<p class=\"pp_singleton_header\">unaligned"
187
+ << " (" << (m_unaligned.size()) << ")</p>";
188
+ cout << "<table align=\"center\">";
189
+ int step_size = 1;
190
+ if (m_unaligned.size() > max_mismatch)
191
+ step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
192
+ for(size_t i=0; i<m_unaligned.size(); i+=step_size)
193
+ m_unaligned[i]->PrintClippedHTML( &cout, 160 );
194
+ cout << "</table>";
195
+ }
196
+
197
+ // mismatched phrases
198
+ if (m_mismatch.size() > 0) {
199
+ cout << "<p class=\"pp_singleton_header\">mismatched"
200
+ << " (" << (m_mismatch.size()) << ")</p>";
201
+ cout << "<table align=\"center\">";
202
+ int step_size = 1;
203
+ if (m_mismatch.size() > max_mismatch)
204
+ step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
205
+ for(size_t i=0; i<m_mismatch.size(); i+=step_size)
206
+ m_mismatch[i]->PrintClippedHTML( &cout, 160 );
207
+ cout << "</table>";
208
+ }
209
+ }
mosesdecoder/biconcor/PhrasePairCollection.h ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include <vector>
4
+ #include <string>
5
+
6
+ class Alignment;
7
+ class PhrasePair;
8
+ class SuffixArray;
9
+ class TargetCorpus;
10
+ class Mismatch;
11
+
12
+ class PhrasePairCollection
13
+ {
14
+ public:
15
+ typedef unsigned int INDEX;
16
+
17
+ private:
18
+ SuffixArray *m_suffixArray;
19
+ TargetCorpus *m_targetCorpus;
20
+ Alignment *m_alignment;
21
+ std::vector<std::vector<PhrasePair*> > m_collection;
22
+ std::vector< Mismatch* > m_mismatch, m_unaligned;
23
+ int m_size;
24
+ int m_max_lookup;
25
+ int m_max_translation;
26
+ int m_max_example;
27
+
28
+ // No copying allowed.
29
+ PhrasePairCollection(const PhrasePairCollection&);
30
+ void operator=(const PhrasePairCollection&);
31
+
32
+ public:
33
+ PhrasePairCollection ( SuffixArray *, TargetCorpus *, Alignment *, int, int );
34
+ ~PhrasePairCollection ();
35
+
36
+ int GetCollection( const std::vector<std::string >& sourceString );
37
+ void Print(bool pretty) const;
38
+ void PrintHTML() const;
39
+ };
40
+
41
+ // sorting helper
42
+ struct CompareBySize {
43
+ bool operator()(const std::vector<PhrasePair*>& a, const std::vector<PhrasePair*>& b ) const {
44
+ return a.size() > b.size();
45
+ }
46
+ };
mosesdecoder/biconcor/SuffixArray.h ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+
3
+ #include "Vocabulary.h"
4
+
5
+ class SuffixArray
6
+ {
7
+ public:
8
+ typedef unsigned int INDEX;
9
+
10
+ private:
11
+ WORD_ID *m_array;
12
+ INDEX *m_index;
13
+ INDEX *m_buffer;
14
+ char *m_wordInSentence;
15
+ INDEX *m_sentence;
16
+ char *m_sentenceLength;
17
+ WORD_ID m_endOfSentence;
18
+ INDEX *m_document;
19
+ INDEX *m_documentName;
20
+ char *m_documentNameBuffer;
21
+ size_t m_documentNameLength;
22
+ size_t m_documentCount;
23
+ bool m_useDocument;
24
+ Vocabulary m_vcb;
25
+ INDEX m_size;
26
+ INDEX m_sentenceCount;
27
+
28
+ // No copying allowed.
29
+ SuffixArray(const SuffixArray&);
30
+ void operator=(const SuffixArray&);
31
+
32
+ public:
33
+ SuffixArray();
34
+ ~SuffixArray();
35
+
36
+ void Create(const std::string& fileName );
37
+ bool ProcessDocumentLine( const char* const, const size_t );
38
+ void Sort(INDEX start, INDEX end);
39
+ int CompareIndex( INDEX a, INDEX b ) const;
40
+ inline int CompareWord( WORD_ID a, WORD_ID b ) const;
41
+ int Count( const std::vector< WORD > &phrase );
42
+ bool MinCount( const std::vector< WORD > &phrase, INDEX min );
43
+ bool Exists( const std::vector< WORD > &phrase );
44
+ int FindMatches( const std::vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
45
+ int LimitedCount( const std::vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
46
+ INDEX FindFirst( const std::vector< WORD > &phrase, INDEX &start, INDEX &end );
47
+ INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
48
+ int Match( const std::vector< WORD > &phrase, INDEX index );
49
+ void List( INDEX start, INDEX end );
50
+ void PrintSentenceMatches( const std::vector< WORD > &phrase );
51
+ inline INDEX GetPosition( INDEX index ) const {
52
+ return m_index[ index ];
53
+ }
54
+ inline INDEX GetSentence( INDEX position ) const {
55
+ return m_sentence[position];
56
+ }
57
+ inline char GetWordInSentence( INDEX position ) const {
58
+ return m_wordInSentence[position];
59
+ }
60
+ inline char GetSentenceLength( INDEX sentenceId ) const {
61
+ return m_sentenceLength[sentenceId];
62
+ }
63
+ inline INDEX GetSize() const {
64
+ return m_size;
65
+ }
66
+ inline WORD GetWord( INDEX position ) const {
67
+ return m_vcb.GetWord( m_array[position] );
68
+ }
69
+ void UseDocument() {
70
+ m_useDocument = true;
71
+ }
72
+ INDEX GetDocument( INDEX sentence ) const;
73
+ void PrintDocumentName( INDEX document ) {
74
+ for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
75
+ std::cout << m_documentNameBuffer[ i ];
76
+ }
77
+ }
78
+ void Save(const std::string& fileName ) const;
79
+ void Load(const std::string& fileName );
80
+ void CheckAllocation(bool, const char *dataStructure) const;
81
+ bool Error( const char* message, const std::string& fileName) const;
82
+ };
mosesdecoder/biconcor/biconcor.cpp ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "SuffixArray.h"
2
+ #include "TargetCorpus.h"
3
+ #include "Alignment.h"
4
+ #include "PhrasePairCollection.h"
5
+ #include <getopt.h>
6
+ #include "base64.h"
7
+
8
+ using namespace std;
9
+
10
+ int main(int argc, char* argv[])
11
+ {
12
+ // handle parameters
13
+ string query;
14
+ string fileNameSuffix;
15
+ string fileNameSource;
16
+ string fileNameTarget = "";
17
+ string fileNameAlignment = "";
18
+ int loadFlag = false;
19
+ int saveFlag = false;
20
+ int createFlag = false;
21
+ int queryFlag = false;
22
+ int htmlFlag = false; // output as HTML
23
+ int prettyFlag = false; // output readable on screen
24
+ int stdioFlag = false; // receive requests from STDIN, respond to STDOUT
25
+ int max_translation = 20;
26
+ int max_example = 50;
27
+ string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create source-corpus]\n\t[--query string]\n\t[--target target-corpus]\n\t[--alignment file]\n\t[--translations count]\n\t[--examples count]\n\t[--html]\n\t[--stdio]\n";
28
+ while(1) {
29
+ static struct option long_options[] = {
30
+ {"load", required_argument, 0, 'l'},
31
+ {"save", required_argument, 0, 's'},
32
+ {"create", required_argument, 0, 'c'},
33
+ {"query", required_argument, 0, 'q'},
34
+ {"target", required_argument, 0, 't'},
35
+ {"alignment", required_argument, 0, 'a'},
36
+ {"html", no_argument, 0, 'h'},
37
+ {"pretty", no_argument, 0, 'p'},
38
+ {"stdio", no_argument, 0, 'i'},
39
+ {"translations", required_argument, 0, 'o'},
40
+ {"examples", required_argument, 0, 'e'},
41
+ {0, 0, 0, 0}
42
+ };
43
+ int option_index = 0;
44
+ int c = getopt_long (argc, argv, "l:s:c:q:Q:t:a:hpio:e:", long_options, &option_index);
45
+ if (c == -1) break;
46
+ switch (c) {
47
+ case 'l':
48
+ fileNameSuffix = string(optarg);
49
+ loadFlag = true;
50
+ break;
51
+ case 't':
52
+ fileNameTarget = string(optarg);
53
+ break;
54
+ case 'a':
55
+ fileNameAlignment = string(optarg);
56
+ break;
57
+ case 's':
58
+ fileNameSuffix = string(optarg);
59
+ saveFlag = true;
60
+ break;
61
+ case 'c':
62
+ fileNameSource = string(optarg);
63
+ createFlag = true;
64
+ break;
65
+ case 'Q':
66
+ query = base64_decode(string(optarg));
67
+ queryFlag = true;
68
+ break;
69
+ case 'q':
70
+ query = string(optarg);
71
+ queryFlag = true;
72
+ break;
73
+ case 'o':
74
+ max_translation = atoi(optarg);
75
+ break;
76
+ case 'e':
77
+ max_example = atoi(optarg);
78
+ break;
79
+ case 'p':
80
+ prettyFlag = true;
81
+ break;
82
+ case 'h':
83
+ htmlFlag = true;
84
+ break;
85
+ case 'i':
86
+ stdioFlag = true;
87
+ break;
88
+ default:
89
+ cerr << info;
90
+ exit(1);
91
+ }
92
+ }
93
+ if (stdioFlag) {
94
+ queryFlag = true;
95
+ }
96
+
97
+ // check if parameter settings are legal
98
+ if (saveFlag && !createFlag) {
99
+ cerr << "error: cannot save without creating\n" << info;
100
+ exit(1);
101
+ }
102
+ if (saveFlag && loadFlag) {
103
+ cerr << "error: cannot load and save at the same time\n" << info;
104
+ exit(1);
105
+ }
106
+ if (!loadFlag && !createFlag) {
107
+ cerr << "error: neither load or create - i have no info!\n" << info;
108
+ exit(1);
109
+ }
110
+ if (createFlag && (fileNameTarget == "" || fileNameAlignment == "")) {
111
+ cerr << "error: i have no target corpus or alignment\n" << info;
112
+ exit(1);
113
+ }
114
+
115
+ // do your thing
116
+ SuffixArray suffixArray;
117
+ TargetCorpus targetCorpus;
118
+ Alignment alignment;
119
+ if (createFlag) {
120
+ cerr << "will create\n";
121
+ cerr << "source corpus is in " << fileNameSource << endl;
122
+ suffixArray.Create( fileNameSource );
123
+ cerr << "target corpus is in " << fileNameTarget << endl;
124
+ targetCorpus.Create( fileNameTarget );
125
+ cerr << "alignment is in " << fileNameAlignment << endl;
126
+ alignment.Create( fileNameAlignment );
127
+ if (saveFlag) {
128
+ suffixArray.Save( fileNameSuffix );
129
+ targetCorpus.Save( fileNameSuffix );
130
+ alignment.Save( fileNameSuffix );
131
+ cerr << "will save in " << fileNameSuffix << endl;
132
+ }
133
+ }
134
+ if (loadFlag) {
135
+ cerr << "will load from " << fileNameSuffix << endl;
136
+ suffixArray.Load( fileNameSuffix );
137
+ targetCorpus.Load( fileNameSuffix );
138
+ alignment.Load( fileNameSuffix );
139
+ }
140
+ if (stdioFlag) {
141
+ cout << "-|||- BICONCOR START -|||-" << endl << flush;
142
+ while(true) {
143
+ string query;
144
+ if (getline(cin, query, '\n').eof()) {
145
+ return 0;
146
+ }
147
+ vector< string > queryString = alignment.Tokenize( query.c_str() );
148
+ PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
149
+ int total = ppCollection.GetCollection( queryString );
150
+ cout << "TOTAL: " << total << endl;
151
+ if (htmlFlag) {
152
+ ppCollection.PrintHTML();
153
+ } else {
154
+ ppCollection.Print(prettyFlag);
155
+ }
156
+ cout << "-|||- BICONCOR END -|||-" << endl << flush;
157
+ }
158
+ } else if (queryFlag) {
159
+ cerr << "query is " << query << endl;
160
+ vector< string > queryString = alignment.Tokenize( query.c_str() );
161
+ PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
162
+ ppCollection.GetCollection( queryString );
163
+ if (htmlFlag) {
164
+ ppCollection.PrintHTML();
165
+ } else {
166
+ ppCollection.Print(prettyFlag);
167
+ }
168
+ }
169
+
170
+ return 0;
171
+ }
mosesdecoder/chk.tmp ADDED
@@ -0,0 +1 @@
 
 
1
+ test
mosesdecoder/doxygen.conf ADDED
@@ -0,0 +1,1781 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Doxyfile 1.7.6.1
2
+
3
+ # This file describes the settings to be used by the documentation system
4
+ # doxygen (www.doxygen.org) for a project.
5
+ #
6
+ # All text after a hash (#) is considered a comment and will be ignored.
7
+ # The format is:
8
+ # TAG = value [value, ...]
9
+ # For lists items can also be appended using:
10
+ # TAG += value [value, ...]
11
+ # Values that contain spaces should be placed between quotes (" ").
12
+
13
+ #---------------------------------------------------------------------------
14
+ # Project related configuration options
15
+ #---------------------------------------------------------------------------
16
+
17
+ # This tag specifies the encoding used for all characters in the config file
18
+ # that follow. The default is UTF-8 which is also the encoding used for all
19
+ # text before the first occurrence of this tag. Doxygen uses libiconv (or the
20
+ # iconv built into libc) for the transcoding. See
21
+ # http://www.gnu.org/software/libiconv for the list of possible encodings.
22
+
23
+ DOXYFILE_ENCODING = UTF-8
24
+
25
+ # The PROJECT_NAME tag is a single word (or sequence of words) that should
26
+ # identify the project. Note that if you do not use Doxywizard you need
27
+ # to put quotes around the project name if it contains spaces.
28
+
29
+ PROJECT_NAME = "Moses Decoder"
30
+
31
+ # The PROJECT_NUMBER tag can be used to enter a project or revision number.
32
+ # This could be handy for archiving the generated documentation or
33
+ # if some version control system is used.
34
+
35
+ PROJECT_NUMBER =
36
+
37
+ # Using the PROJECT_BRIEF tag one can provide an optional one line description
38
+ # for a project that appears at the top of each page and should give viewer
39
+ # a quick idea about the purpose of the project. Keep the description short.
40
+
41
+ PROJECT_BRIEF =
42
+
43
+ # With the PROJECT_LOGO tag one can specify an logo or icon that is
44
+ # included in the documentation. The maximum height of the logo should not
45
+ # exceed 55 pixels and the maximum width should not exceed 200 pixels.
46
+ # Doxygen will copy the logo to the output directory.
47
+
48
+ PROJECT_LOGO =
49
+
50
+ # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
51
+ # base path where the generated documentation will be put.
52
+ # If a relative path is entered, it will be relative to the location
53
+ # where doxygen was started. If left blank the current directory will be used.
54
+
55
+ OUTPUT_DIRECTORY = doxy
56
+
57
+ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
58
+ # 4096 sub-directories (in 2 levels) under the output directory of each output
59
+ # format and will distribute the generated files over these directories.
60
+ # Enabling this option can be useful when feeding doxygen a huge amount of
61
+ # source files, where putting all generated files in the same directory would
62
+ # otherwise cause performance problems for the file system.
63
+
64
+ CREATE_SUBDIRS = NO
65
+
66
+ # The OUTPUT_LANGUAGE tag is used to specify the language in which all
67
+ # documentation generated by doxygen is written. Doxygen will use this
68
+ # information to generate all constant output in the proper language.
69
+ # The default language is English, other supported languages are:
70
+ # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
71
+ # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
72
+ # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
73
+ # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
74
+ # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
75
+ # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
76
+
77
+ OUTPUT_LANGUAGE = English
78
+
79
+ # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
80
+ # include brief member descriptions after the members that are listed in
81
+ # the file and class documentation (similar to JavaDoc).
82
+ # Set to NO to disable this.
83
+
84
+ BRIEF_MEMBER_DESC = YES
85
+
86
+ # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
87
+ # the brief description of a member or function before the detailed description.
88
+ # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
89
+ # brief descriptions will be completely suppressed.
90
+
91
+ REPEAT_BRIEF = YES
92
+
93
+ # This tag implements a quasi-intelligent brief description abbreviator
94
+ # that is used to form the text in various listings. Each string
95
+ # in this list, if found as the leading text of the brief description, will be
96
+ # stripped from the text and the result after processing the whole list, is
97
+ # used as the annotated text. Otherwise, the brief description is used as-is.
98
+ # If left blank, the following values are used ("$name" is automatically
99
+ # replaced with the name of the entity): "The $name class" "The $name widget"
100
+ # "The $name file" "is" "provides" "specifies" "contains"
101
+ # "represents" "a" "an" "the"
102
+
103
+ ABBREVIATE_BRIEF =
104
+
105
+ # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
106
+ # Doxygen will generate a detailed section even if there is only a brief
107
+ # description.
108
+
109
+ ALWAYS_DETAILED_SEC = NO
110
+
111
+ # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
112
+ # inherited members of a class in the documentation of that class as if those
113
+ # members were ordinary class members. Constructors, destructors and assignment
114
+ # operators of the base classes will not be shown.
115
+
116
+ INLINE_INHERITED_MEMB = NO
117
+
118
+ # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
119
+ # path before files name in the file list and in the header files. If set
120
+ # to NO the shortest path that makes the file name unique will be used.
121
+
122
+ FULL_PATH_NAMES = YES
123
+
124
+ # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
125
+ # can be used to strip a user-defined part of the path. Stripping is
126
+ # only done if one of the specified strings matches the left-hand part of
127
+ # the path. The tag can be used to show relative paths in the file list.
128
+ # If left blank the directory from which doxygen is run is used as the
129
+ # path to strip.
130
+
131
+ STRIP_FROM_PATH =
132
+
133
+ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
134
+ # the path mentioned in the documentation of a class, which tells
135
+ # the reader which header file to include in order to use a class.
136
+ # If left blank only the name of the header file containing the class
137
+ # definition is used. Otherwise one should specify the include paths that
138
+ # are normally passed to the compiler using the -I flag.
139
+
140
+ STRIP_FROM_INC_PATH =
141
+
142
+ # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
143
+ # (but less readable) file names. This can be useful if your file system
144
+ # doesn't support long names like on DOS, Mac, or CD-ROM.
145
+
146
+ SHORT_NAMES = NO
147
+
148
+ # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
149
+ # will interpret the first line (until the first dot) of a JavaDoc-style
150
+ # comment as the brief description. If set to NO, the JavaDoc
151
+ # comments will behave just like regular Qt-style comments
152
+ # (thus requiring an explicit @brief command for a brief description.)
153
+
154
+ JAVADOC_AUTOBRIEF = NO
155
+
156
+ # If the QT_AUTOBRIEF tag is set to YES then Doxygen will
157
+ # interpret the first line (until the first dot) of a Qt-style
158
+ # comment as the brief description. If set to NO, the comments
159
+ # will behave just like regular Qt-style comments (thus requiring
160
+ # an explicit \brief command for a brief description.)
161
+
162
+ QT_AUTOBRIEF = NO
163
+
164
+ # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
165
+ # treat a multi-line C++ special comment block (i.e. a block of //! or ///
166
+ # comments) as a brief description. This used to be the default behaviour.
167
+ # The new default is to treat a multi-line C++ comment block as a detailed
168
+ # description. Set this tag to YES if you prefer the old behaviour instead.
169
+
170
+ MULTILINE_CPP_IS_BRIEF = NO
171
+
172
+ # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
173
+ # member inherits the documentation from any documented member that it
174
+ # re-implements.
175
+
176
+ INHERIT_DOCS = YES
177
+
178
+ # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
179
+ # a new page for each member. If set to NO, the documentation of a member will
180
+ # be part of the file/class/namespace that contains it.
181
+
182
+ SEPARATE_MEMBER_PAGES = NO
183
+
184
+ # The TAB_SIZE tag can be used to set the number of spaces in a tab.
185
+ # Doxygen uses this value to replace tabs by spaces in code fragments.
186
+
187
+ TAB_SIZE = 8
188
+
189
+ # This tag can be used to specify a number of aliases that acts
190
+ # as commands in the documentation. An alias has the form "name=value".
191
+ # For example adding "sideeffect=\par Side Effects:\n" will allow you to
192
+ # put the command \sideeffect (or @sideeffect) in the documentation, which
193
+ # will result in a user-defined paragraph with heading "Side Effects:".
194
+ # You can put \n's in the value part of an alias to insert newlines.
195
+
196
+ ALIASES =
197
+
198
+ # This tag can be used to specify a number of word-keyword mappings (TCL only).
199
+ # A mapping has the form "name=value". For example adding
200
+ # "class=itcl::class" will allow you to use the command class in the
201
+ # itcl::class meaning.
202
+
203
+ TCL_SUBST =
204
+
205
+ # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
206
+ # sources only. Doxygen will then generate output that is more tailored for C.
207
+ # For instance, some of the names that are used will be different. The list
208
+ # of all members will be omitted, etc.
209
+
210
+ OPTIMIZE_OUTPUT_FOR_C = NO
211
+
212
+ # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
213
+ # sources only. Doxygen will then generate output that is more tailored for
214
+ # Java. For instance, namespaces will be presented as packages, qualified
215
+ # scopes will look different, etc.
216
+
217
+ OPTIMIZE_OUTPUT_JAVA = NO
218
+
219
+ # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
220
+ # sources only. Doxygen will then generate output that is more tailored for
221
+ # Fortran.
222
+
223
+ OPTIMIZE_FOR_FORTRAN = NO
224
+
225
+ # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
226
+ # sources. Doxygen will then generate output that is tailored for
227
+ # VHDL.
228
+
229
+ OPTIMIZE_OUTPUT_VHDL = NO
230
+
231
+ # Doxygen selects the parser to use depending on the extension of the files it
232
+ # parses. With this tag you can assign which parser to use for a given extension.
233
+ # Doxygen has a built-in mapping, but you can override or extend it using this
234
+ # tag. The format is ext=language, where ext is a file extension, and language
235
+ # is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
236
+ # C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
237
+ # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
238
+ # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
239
+ # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
240
+
241
+ EXTENSION_MAPPING =
242
+
243
+ # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
244
+ # to include (a tag file for) the STL sources as input, then you should
245
+ # set this tag to YES in order to let doxygen match functions declarations and
246
+ # definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
247
+ # func(std::string) {}). This also makes the inheritance and collaboration
248
+ # diagrams that involve STL classes more complete and accurate.
249
+
250
+ BUILTIN_STL_SUPPORT = NO
251
+
252
+ # If you use Microsoft's C++/CLI language, you should set this option to YES to
253
+ # enable parsing support.
254
+
255
+ CPP_CLI_SUPPORT = NO
256
+
257
+ # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
258
+ # Doxygen will parse them like normal C++ but will assume all classes use public
259
+ # instead of private inheritance when no explicit protection keyword is present.
260
+
261
+ SIP_SUPPORT = NO
262
+
263
+ # For Microsoft's IDL there are propget and propput attributes to indicate getter
264
+ # and setter methods for a property. Setting this option to YES (the default)
265
+ # will make doxygen replace the get and set methods by a property in the
266
+ # documentation. This will only work if the methods are indeed getting or
267
+ # setting a simple type. If this is not the case, or you want to show the
268
+ # methods anyway, you should set this option to NO.
269
+
270
+ IDL_PROPERTY_SUPPORT = YES
271
+
272
+ # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
273
+ # tag is set to YES, then doxygen will reuse the documentation of the first
274
+ # member in the group (if any) for the other members of the group. By default
275
+ # all members of a group must be documented explicitly.
276
+
277
+ DISTRIBUTE_GROUP_DOC = NO
278
+
279
+ # Set the SUBGROUPING tag to YES (the default) to allow class member groups of
280
+ # the same type (for instance a group of public functions) to be put as a
281
+ # subgroup of that type (e.g. under the Public Functions section). Set it to
282
+ # NO to prevent subgrouping. Alternatively, this can be done per class using
283
+ # the \nosubgrouping command.
284
+
285
+ SUBGROUPING = YES
286
+
287
+ # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
288
+ # unions are shown inside the group in which they are included (e.g. using
289
+ # @ingroup) instead of on a separate page (for HTML and Man pages) or
290
+ # section (for LaTeX and RTF).
291
+
292
+ INLINE_GROUPED_CLASSES = NO
293
+
294
+ # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
295
+ # unions with only public data fields will be shown inline in the documentation
296
+ # of the scope in which they are defined (i.e. file, namespace, or group
297
+ # documentation), provided this scope is documented. If set to NO (the default),
298
+ # structs, classes, and unions are shown on a separate page (for HTML and Man
299
+ # pages) or section (for LaTeX and RTF).
300
+
301
+ INLINE_SIMPLE_STRUCTS = NO
302
+
303
+ # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
304
+ # is documented as struct, union, or enum with the name of the typedef. So
305
+ # typedef struct TypeS {} TypeT, will appear in the documentation as a struct
306
+ # with name TypeT. When disabled the typedef will appear as a member of a file,
307
+ # namespace, or class. And the struct will be named TypeS. This can typically
308
+ # be useful for C code in case the coding convention dictates that all compound
309
+ # types are typedef'ed and only the typedef is referenced, never the tag name.
310
+
311
+ TYPEDEF_HIDES_STRUCT = NO
312
+
313
+ # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
314
+ # determine which symbols to keep in memory and which to flush to disk.
315
+ # When the cache is full, less often used symbols will be written to disk.
316
+ # For small to medium size projects (<1000 input files) the default value is
317
+ # probably good enough. For larger projects a too small cache size can cause
318
+ # doxygen to be busy swapping symbols to and from disk most of the time
319
+ # causing a significant performance penalty.
320
+ # If the system has enough physical memory increasing the cache will improve the
321
+ # performance by keeping more symbols in memory. Note that the value works on
322
+ # a logarithmic scale so increasing the size by one will roughly double the
323
+ # memory usage. The cache size is given by this formula:
324
+ # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
325
+ # corresponding to a cache size of 2^16 = 65536 symbols.
326
+
327
+ SYMBOL_CACHE_SIZE = 0
328
+
329
+ # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
330
+ # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
331
+ # their name and scope. Since this can be an expensive process and often the
332
+ # same symbol appear multiple times in the code, doxygen keeps a cache of
333
+ # pre-resolved symbols. If the cache is too small doxygen will become slower.
334
+ # If the cache is too large, memory is wasted. The cache size is given by this
335
+ # formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
336
+ # corresponding to a cache size of 2^16 = 65536 symbols.
337
+
338
+ LOOKUP_CACHE_SIZE = 0
339
+
340
+ #---------------------------------------------------------------------------
341
+ # Build related configuration options
342
+ #---------------------------------------------------------------------------
343
+
344
+ # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
345
+ # documentation are documented, even if no documentation was available.
346
+ # Private class members and static file members will be hidden unless
347
+ # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
348
+
349
+ EXTRACT_ALL = YES
350
+
351
+ # If the EXTRACT_PRIVATE tag is set to YES all private members of a class
352
+ # will be included in the documentation.
353
+
354
+ EXTRACT_PRIVATE = YES
355
+
356
+ # If the EXTRACT_STATIC tag is set to YES all static members of a file
357
+ # will be included in the documentation.
358
+
359
+ EXTRACT_STATIC = YES
360
+
361
+ # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
362
+ # defined locally in source files will be included in the documentation.
363
+ # If set to NO only classes defined in header files are included.
364
+
365
+ EXTRACT_LOCAL_CLASSES = NO
366
+
367
+ # This flag is only useful for Objective-C code. When set to YES local
368
+ # methods, which are defined in the implementation section but not in
369
+ # the interface are included in the documentation.
370
+ # If set to NO (the default) only methods in the interface are included.
371
+
372
+ EXTRACT_LOCAL_METHODS = NO
373
+
374
+ # If this flag is set to YES, the members of anonymous namespaces will be
375
+ # extracted and appear in the documentation as a namespace called
376
+ # 'anonymous_namespace{file}', where file will be replaced with the base
377
+ # name of the file that contains the anonymous namespace. By default
378
+ # anonymous namespaces are hidden.
379
+
380
+ EXTRACT_ANON_NSPACES = NO
381
+
382
+ # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
383
+ # undocumented members of documented classes, files or namespaces.
384
+ # If set to NO (the default) these members will be included in the
385
+ # various overviews, but no documentation section is generated.
386
+ # This option has no effect if EXTRACT_ALL is enabled.
387
+
388
+ HIDE_UNDOC_MEMBERS = NO
389
+
390
+ # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
391
+ # undocumented classes that are normally visible in the class hierarchy.
392
+ # If set to NO (the default) these classes will be included in the various
393
+ # overviews. This option has no effect if EXTRACT_ALL is enabled.
394
+
395
+ HIDE_UNDOC_CLASSES = NO
396
+
397
+ # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
398
+ # friend (class|struct|union) declarations.
399
+ # If set to NO (the default) these declarations will be included in the
400
+ # documentation.
401
+
402
+ HIDE_FRIEND_COMPOUNDS = NO
403
+
404
+ # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
405
+ # documentation blocks found inside the body of a function.
406
+ # If set to NO (the default) these blocks will be appended to the
407
+ # function's detailed documentation block.
408
+
409
+ HIDE_IN_BODY_DOCS = NO
410
+
411
+ # The INTERNAL_DOCS tag determines if documentation
412
+ # that is typed after a \internal command is included. If the tag is set
413
+ # to NO (the default) then the documentation will be excluded.
414
+ # Set it to YES to include the internal documentation.
415
+
416
+ INTERNAL_DOCS = NO
417
+
418
+ # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
419
+ # file names in lower-case letters. If set to YES upper-case letters are also
420
+ # allowed. This is useful if you have classes or files whose names only differ
421
+ # in case and if your file system supports case sensitive file names. Windows
422
+ # and Mac users are advised to set this option to NO.
423
+
424
+ CASE_SENSE_NAMES = YES
425
+
426
+ # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
427
+ # will show members with their full class and namespace scopes in the
428
+ # documentation. If set to YES the scope will be hidden.
429
+
430
+ HIDE_SCOPE_NAMES = NO
431
+
432
+ # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
433
+ # will put a list of the files that are included by a file in the documentation
434
+ # of that file.
435
+
436
+ SHOW_INCLUDE_FILES = YES
437
+
438
+ # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
439
+ # will list include files with double quotes in the documentation
440
+ # rather than with sharp brackets.
441
+
442
+ FORCE_LOCAL_INCLUDES = NO
443
+
444
+ # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
445
+ # is inserted in the documentation for inline members.
446
+
447
+ INLINE_INFO = YES
448
+
449
+ # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
450
+ # will sort the (detailed) documentation of file and class members
451
+ # alphabetically by member name. If set to NO the members will appear in
452
+ # declaration order.
453
+
454
+ SORT_MEMBER_DOCS = YES
455
+
456
+ # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
457
+ # brief documentation of file, namespace and class members alphabetically
458
+ # by member name. If set to NO (the default) the members will appear in
459
+ # declaration order.
460
+
461
+ SORT_BRIEF_DOCS = NO
462
+
463
+ # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
464
+ # will sort the (brief and detailed) documentation of class members so that
465
+ # constructors and destructors are listed first. If set to NO (the default)
466
+ # the constructors will appear in the respective orders defined by
467
+ # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
468
+ # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
469
+ # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
470
+
471
+ SORT_MEMBERS_CTORS_1ST = NO
472
+
473
+ # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
474
+ # hierarchy of group names into alphabetical order. If set to NO (the default)
475
+ # the group names will appear in their defined order.
476
+
477
+ SORT_GROUP_NAMES = NO
478
+
479
+ # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
480
+ # sorted by fully-qualified names, including namespaces. If set to
481
+ # NO (the default), the class list will be sorted only by class name,
482
+ # not including the namespace part.
483
+ # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
484
+ # Note: This option applies only to the class list, not to the
485
+ # alphabetical list.
486
+
487
+ SORT_BY_SCOPE_NAME = NO
488
+
489
+ # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
490
+ # do proper type resolution of all parameters of a function it will reject a
491
+ # match between the prototype and the implementation of a member function even
492
+ # if there is only one candidate or it is obvious which candidate to choose
493
+ # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
494
+ # will still accept a match between prototype and implementation in such cases.
495
+
496
+ STRICT_PROTO_MATCHING = NO
497
+
498
+ # The GENERATE_TODOLIST tag can be used to enable (YES) or
499
+ # disable (NO) the todo list. This list is created by putting \todo
500
+ # commands in the documentation.
501
+
502
+ GENERATE_TODOLIST = YES
503
+
504
+ # The GENERATE_TESTLIST tag can be used to enable (YES) or
505
+ # disable (NO) the test list. This list is created by putting \test
506
+ # commands in the documentation.
507
+
508
+ GENERATE_TESTLIST = YES
509
+
510
+ # The GENERATE_BUGLIST tag can be used to enable (YES) or
511
+ # disable (NO) the bug list. This list is created by putting \bug
512
+ # commands in the documentation.
513
+
514
+ GENERATE_BUGLIST = YES
515
+
516
+ # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
517
+ # disable (NO) the deprecated list. This list is created by putting
518
+ # \deprecated commands in the documentation.
519
+
520
+ GENERATE_DEPRECATEDLIST= YES
521
+
522
+ # The ENABLED_SECTIONS tag can be used to enable conditional
523
+ # documentation sections, marked by \if sectionname ... \endif.
524
+
525
+ ENABLED_SECTIONS =
526
+
527
+ # The MAX_INITIALIZER_LINES tag determines the maximum number of lines
528
+ # the initial value of a variable or macro consists of for it to appear in
529
+ # the documentation. If the initializer consists of more lines than specified
530
+ # here it will be hidden. Use a value of 0 to hide initializers completely.
531
+ # The appearance of the initializer of individual variables and macros in the
532
+ # documentation can be controlled using \showinitializer or \hideinitializer
533
+ # command in the documentation regardless of this setting.
534
+
535
+ MAX_INITIALIZER_LINES = 30
536
+
537
+ # Set the SHOW_USED_FILES tag to NO to disable the list of files generated
538
+ # at the bottom of the documentation of classes and structs. If set to YES the
539
+ # list will mention the files that were used to generate the documentation.
540
+
541
+ SHOW_USED_FILES = YES
542
+
543
+ # If the sources in your project are distributed over multiple directories
544
+ # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
545
+ # in the documentation. The default is NO.
546
+
547
+ SHOW_DIRECTORIES = NO
548
+
549
+ # Set the SHOW_FILES tag to NO to disable the generation of the Files page.
550
+ # This will remove the Files entry from the Quick Index and from the
551
+ # Folder Tree View (if specified). The default is YES.
552
+
553
+ SHOW_FILES = YES
554
+
555
+ # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
556
+ # Namespaces page.
557
+ # This will remove the Namespaces entry from the Quick Index
558
+ # and from the Folder Tree View (if specified). The default is YES.
559
+
560
+ SHOW_NAMESPACES = YES
561
+
562
+ # The FILE_VERSION_FILTER tag can be used to specify a program or script that
563
+ # doxygen should invoke to get the current version for each file (typically from
564
+ # the version control system). Doxygen will invoke the program by executing (via
565
+ # popen()) the command <command> <input-file>, where <command> is the value of
566
+ # the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
567
+ # provided by doxygen. Whatever the program writes to standard output
568
+ # is used as the file version. See the manual for examples.
569
+
570
+ FILE_VERSION_FILTER =
571
+
572
+ # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
573
+ # by doxygen. The layout file controls the global structure of the generated
574
+ # output files in an output format independent way. The create the layout file
575
+ # that represents doxygen's defaults, run doxygen with the -l option.
576
+ # You can optionally specify a file name after the option, if omitted
577
+ # DoxygenLayout.xml will be used as the name of the layout file.
578
+
579
+ LAYOUT_FILE =
580
+
581
+ # The CITE_BIB_FILES tag can be used to specify one or more bib files
582
+ # containing the references data. This must be a list of .bib files. The
583
+ # .bib extension is automatically appended if omitted. Using this command
584
+ # requires the bibtex tool to be installed. See also
585
+ # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
586
+ # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
587
+ # feature you need bibtex and perl available in the search path.
588
+
589
+ CITE_BIB_FILES =
590
+
591
+ #---------------------------------------------------------------------------
592
+ # configuration options related to warning and progress messages
593
+ #---------------------------------------------------------------------------
594
+
595
+ # The QUIET tag can be used to turn on/off the messages that are generated
596
+ # by doxygen. Possible values are YES and NO. If left blank NO is used.
597
+
598
+ QUIET = NO
599
+
600
+ # The WARNINGS tag can be used to turn on/off the warning messages that are
601
+ # generated by doxygen. Possible values are YES and NO. If left blank
602
+ # NO is used.
603
+
604
+ WARNINGS = YES
605
+
606
+ # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
607
+ # for undocumented members. If EXTRACT_ALL is set to YES then this flag will
608
+ # automatically be disabled.
609
+
610
+ WARN_IF_UNDOCUMENTED = YES
611
+
612
+ # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
613
+ # potential errors in the documentation, such as not documenting some
614
+ # parameters in a documented function, or documenting parameters that
615
+ # don't exist or using markup commands wrongly.
616
+
617
+ WARN_IF_DOC_ERROR = YES
618
+
619
+ # The WARN_NO_PARAMDOC option can be enabled to get warnings for
620
+ # functions that are documented, but have no documentation for their parameters
621
+ # or return value. If set to NO (the default) doxygen will only warn about
622
+ # wrong or incomplete parameter documentation, but not about the absence of
623
+ # documentation.
624
+
625
+ WARN_NO_PARAMDOC = NO
626
+
627
+ # The WARN_FORMAT tag determines the format of the warning messages that
628
+ # doxygen can produce. The string should contain the $file, $line, and $text
629
+ # tags, which will be replaced by the file and line number from which the
630
+ # warning originated and the warning text. Optionally the format may contain
631
+ # $version, which will be replaced by the version of the file (if it could
632
+ # be obtained via FILE_VERSION_FILTER)
633
+
634
+ WARN_FORMAT = "$file:$line: $text"
635
+
636
+ # The WARN_LOGFILE tag can be used to specify a file to which warning
637
+ # and error messages should be written. If left blank the output is written
638
+ # to stderr.
639
+
640
+ WARN_LOGFILE =
641
+
642
+ #---------------------------------------------------------------------------
643
+ # configuration options related to the input files
644
+ #---------------------------------------------------------------------------
645
+
646
+ # The INPUT tag can be used to specify the files and/or directories that contain
647
+ # documented source files. You may enter file names like "myfile.cpp" or
648
+ # directories like "/usr/src/myproject". Separate the files or directories
649
+ # with spaces.
650
+
651
+ INPUT = moses
652
+
653
+ # This tag can be used to specify the character encoding of the source files
654
+ # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
655
+ # also the default input encoding. Doxygen uses libiconv (or the iconv built
656
+ # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
657
+ # the list of possible encodings.
658
+
659
+ INPUT_ENCODING = UTF-8
660
+
661
+ # If the value of the INPUT tag contains directories, you can use the
662
+ # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
663
+ # and *.h) to filter out the source-files in the directories. If left
664
+ # blank the following patterns are tested:
665
+ # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
666
+ # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
667
+ # *.f90 *.f *.for *.vhd *.vhdl
668
+
669
+ FILE_PATTERNS =
670
+
671
+ # The RECURSIVE tag can be used to turn specify whether or not subdirectories
672
+ # should be searched for input files as well. Possible values are YES and NO.
673
+ # If left blank NO is used.
674
+
675
+ RECURSIVE = YES
676
+
677
+ # The EXCLUDE tag can be used to specify files and/or directories that should be
678
+ # excluded from the INPUT source files. This way you can easily exclude a
679
+ # subdirectory from a directory tree whose root is specified with the INPUT tag.
680
+ # Note that relative paths are relative to the directory from which doxygen is
681
+ # run.
682
+
683
+ EXCLUDE = opt regtest doxy
684
+
685
+ # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
686
+ # directories that are symbolic links (a Unix file system feature) are excluded
687
+ # from the input.
688
+
689
+ EXCLUDE_SYMLINKS = NO
690
+
691
+ # If the value of the INPUT tag contains directories, you can use the
692
+ # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
693
+ # certain files from those directories. Note that the wildcards are matched
694
+ # against the file with absolute path, so to exclude all test directories
695
+ # for example use the pattern */test/*
696
+
697
+ EXCLUDE_PATTERNS = opt/* regtest/* doxy/*
698
+
699
+ # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
700
+ # (namespaces, classes, functions, etc.) that should be excluded from the
701
+ # output. The symbol name can be a fully qualified name, a word, or if the
702
+ # wildcard * is used, a substring. Examples: ANamespace, AClass,
703
+ # AClass::ANamespace, ANamespace::*Test
704
+
705
+ EXCLUDE_SYMBOLS =
706
+
707
+ # The EXAMPLE_PATH tag can be used to specify one or more files or
708
+ # directories that contain example code fragments that are included (see
709
+ # the \include command).
710
+
711
+ EXAMPLE_PATH =
712
+
713
+ # If the value of the EXAMPLE_PATH tag contains directories, you can use the
714
+ # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
715
+ # and *.h) to filter out the source-files in the directories. If left
716
+ # blank all files are included.
717
+
718
+ EXAMPLE_PATTERNS =
719
+
720
+ # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
721
+ # searched for input files to be used with the \include or \dontinclude
722
+ # commands irrespective of the value of the RECURSIVE tag.
723
+ # Possible values are YES and NO. If left blank NO is used.
724
+
725
+ EXAMPLE_RECURSIVE = NO
726
+
727
+ # The IMAGE_PATH tag can be used to specify one or more files or
728
+ # directories that contain image that are included in the documentation (see
729
+ # the \image command).
730
+
731
+ IMAGE_PATH =
732
+
733
+ # The INPUT_FILTER tag can be used to specify a program that doxygen should
734
+ # invoke to filter for each input file. Doxygen will invoke the filter program
735
+ # by executing (via popen()) the command <filter> <input-file>, where <filter>
736
+ # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
737
+ # input file. Doxygen will then use the output that the filter program writes
738
+ # to standard output.
739
+ # If FILTER_PATTERNS is specified, this tag will be
740
+ # ignored.
741
+
742
+ INPUT_FILTER =
743
+
744
+ # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
745
+ # basis.
746
+ # Doxygen will compare the file name with each pattern and apply the
747
+ # filter if there is a match.
748
+ # The filters are a list of the form:
749
+ # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
750
+ # info on how filters are used. If FILTER_PATTERNS is empty or if
751
+ # non of the patterns match the file name, INPUT_FILTER is applied.
752
+
753
+ FILTER_PATTERNS =
754
+
755
+ # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
756
+ # INPUT_FILTER) will be used to filter the input files when producing source
757
+ # files to browse (i.e. when SOURCE_BROWSER is set to YES).
758
+
759
+ FILTER_SOURCE_FILES = NO,
760
+
761
+ # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
762
+ # pattern. A pattern will override the setting for FILTER_PATTERN (if any)
763
+ # and it is also possible to disable source filtering for a specific pattern
764
+ # using *.ext= (so without naming a filter). This option only has effect when
765
+ # FILTER_SOURCE_FILES is enabled.
766
+
767
+ FILTER_SOURCE_PATTERNS =
768
+
769
+ #---------------------------------------------------------------------------
770
+ # configuration options related to source browsing
771
+ #---------------------------------------------------------------------------
772
+
773
+ # If the SOURCE_BROWSER tag is set to YES then a list of source files will
774
+ # be generated. Documented entities will be cross-referenced with these sources.
775
+ # Note: To get rid of all source code in the generated output, make sure also
776
+ # VERBATIM_HEADERS is set to NO.
777
+
778
+ SOURCE_BROWSER = NO
779
+
780
+ # Setting the INLINE_SOURCES tag to YES will include the body
781
+ # of functions and classes directly in the documentation.
782
+
783
+ INLINE_SOURCES = NO
784
+
785
+ # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
786
+ # doxygen to hide any special comment blocks from generated source code
787
+ # fragments. Normal C and C++ comments will always remain visible.
788
+
789
+ STRIP_CODE_COMMENTS = YES
790
+
791
+ # If the REFERENCED_BY_RELATION tag is set to YES
792
+ # then for each documented function all documented
793
+ # functions referencing it will be listed.
794
+
795
+ REFERENCED_BY_RELATION = NO
796
+
797
+ # If the REFERENCES_RELATION tag is set to YES
798
+ # then for each documented function all documented entities
799
+ # called/used by that function will be listed.
800
+
801
+ REFERENCES_RELATION = NO
802
+
803
+ # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
804
+ # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
805
+ # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
806
+ # link to the source code.
807
+ # Otherwise they will link to the documentation.
808
+
809
+ REFERENCES_LINK_SOURCE = YES
810
+
811
+ # If the USE_HTAGS tag is set to YES then the references to source code
812
+ # will point to the HTML generated by the htags(1) tool instead of doxygen
813
+ # built-in source browser. The htags tool is part of GNU's global source
814
+ # tagging system (see http://www.gnu.org/software/global/global.html). You
815
+ # will need version 4.8.6 or higher.
816
+
817
+ USE_HTAGS = NO
818
+
819
+ # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
820
+ # will generate a verbatim copy of the header file for each class for
821
+ # which an include is specified. Set to NO to disable this.
822
+
823
+ VERBATIM_HEADERS = YES
824
+
825
+ #---------------------------------------------------------------------------
826
+ # configuration options related to the alphabetical class index
827
+ #---------------------------------------------------------------------------
828
+
829
+ # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
830
+ # of all compounds will be generated. Enable this if the project
831
+ # contains a lot of classes, structs, unions or interfaces.
832
+
833
+ ALPHABETICAL_INDEX = YES
834
+
835
+ # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
836
+ # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
837
+ # in which this list will be split (can be a number in the range [1..20])
838
+
839
+ COLS_IN_ALPHA_INDEX = 5
840
+
841
+ # In case all classes in a project start with a common prefix, all
842
+ # classes will be put under the same header in the alphabetical index.
843
+ # The IGNORE_PREFIX tag can be used to specify one or more prefixes that
844
+ # should be ignored while generating the index headers.
845
+
846
+ IGNORE_PREFIX =
847
+
848
+ #---------------------------------------------------------------------------
849
+ # configuration options related to the HTML output
850
+ #---------------------------------------------------------------------------
851
+
852
+ # If the GENERATE_HTML tag is set to YES (the default) Doxygen will
853
+ # generate HTML output.
854
+
855
+ GENERATE_HTML = YES
856
+
857
+ # The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
858
+ # If a relative path is entered the value of OUTPUT_DIRECTORY will be
859
+ # put in front of it. If left blank `html' will be used as the default path.
860
+
861
+ HTML_OUTPUT = html
862
+
863
+ # The HTML_FILE_EXTENSION tag can be used to specify the file extension for
864
+ # each generated HTML page (for example: .htm,.php,.asp). If it is left blank
865
+ # doxygen will generate files with .html extension.
866
+
867
+ HTML_FILE_EXTENSION = .html
868
+
869
+ # The HTML_HEADER tag can be used to specify a personal HTML header for
870
+ # each generated HTML page. If it is left blank doxygen will generate a
871
+ # standard header. Note that when using a custom header you are responsible
872
+ # for the proper inclusion of any scripts and style sheets that doxygen
873
+ # needs, which is dependent on the configuration options used.
874
+ # It is advised to generate a default header using "doxygen -w html
875
+ # header.html footer.html stylesheet.css YourConfigFile" and then modify
876
+ # that header. Note that the header is subject to change so you typically
877
+ # have to redo this when upgrading to a newer version of doxygen or when
878
+ # changing the value of configuration settings such as GENERATE_TREEVIEW!
879
+
880
+ HTML_HEADER =
881
+
882
+ # The HTML_FOOTER tag can be used to specify a personal HTML footer for
883
+ # each generated HTML page. If it is left blank doxygen will generate a
884
+ # standard footer.
885
+
886
+ HTML_FOOTER =
887
+
888
+ # The HTML_STYLESHEET tag can be used to specify a user-defined cascading
889
+ # style sheet that is used by each HTML page. It can be used to
890
+ # fine-tune the look of the HTML output. If the tag is left blank doxygen
891
+ # will generate a default style sheet. Note that doxygen will try to copy
892
+ # the style sheet file to the HTML output directory, so don't put your own
893
+ # style sheet in the HTML output directory as well, or it will be erased!
894
+
895
+ HTML_STYLESHEET =
896
+
897
+ # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
898
+ # other source files which should be copied to the HTML output directory. Note
899
+ # that these files will be copied to the base HTML output directory. Use the
900
+ # $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
901
+ # files. In the HTML_STYLESHEET file, use the file name only. Also note that
902
+ # the files will be copied as-is; there are no commands or markers available.
903
+
904
+ HTML_EXTRA_FILES =
905
+
906
+ # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
907
+ # Doxygen will adjust the colors in the style sheet and background images
908
+ # according to this color. Hue is specified as an angle on a colorwheel,
909
+ # see http://en.wikipedia.org/wiki/Hue for more information.
910
+ # For instance the value 0 represents red, 60 is yellow, 120 is green,
911
+ # 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
912
+ # The allowed range is 0 to 359.
913
+
914
+ HTML_COLORSTYLE_HUE = 220
915
+
916
+ # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
917
+ # the colors in the HTML output. For a value of 0 the output will use
918
+ # grayscales only. A value of 255 will produce the most vivid colors.
919
+
920
+ HTML_COLORSTYLE_SAT = 100
921
+
922
+ # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
923
+ # the luminance component of the colors in the HTML output. Values below
924
+ # 100 gradually make the output lighter, whereas values above 100 make
925
+ # the output darker. The value divided by 100 is the actual gamma applied,
926
+ # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
927
+ # and 100 does not change the gamma.
928
+
929
+ HTML_COLORSTYLE_GAMMA = 80
930
+
931
+ # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
932
+ # page will contain the date and time when the page was generated. Setting
933
+ # this to NO can help when comparing the output of multiple runs.
934
+
935
+ HTML_TIMESTAMP = YES
936
+
937
+ # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
938
+ # files or namespaces will be aligned in HTML using tables. If set to
939
+ # NO a bullet list will be used.
940
+
941
+ HTML_ALIGN_MEMBERS = YES
942
+
943
+ # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
944
+ # documentation will contain sections that can be hidden and shown after the
945
+ # page has loaded. For this to work a browser that supports
946
+ # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
947
+ # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
948
+
949
+ HTML_DYNAMIC_SECTIONS = NO
950
+
951
+ # If the GENERATE_DOCSET tag is set to YES, additional index files
952
+ # will be generated that can be used as input for Apple's Xcode 3
953
+ # integrated development environment, introduced with OSX 10.5 (Leopard).
954
+ # To create a documentation set, doxygen will generate a Makefile in the
955
+ # HTML output directory. Running make will produce the docset in that
956
+ # directory and running "make install" will install the docset in
957
+ # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
958
+ # it at startup.
959
+ # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
960
+ # for more information.
961
+
962
+ GENERATE_DOCSET = NO
963
+
964
+ # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
965
+ # feed. A documentation feed provides an umbrella under which multiple
966
+ # documentation sets from a single provider (such as a company or product suite)
967
+ # can be grouped.
968
+
969
+ DOCSET_FEEDNAME = "Doxygen generated docs"
970
+
971
+ # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
972
+ # should uniquely identify the documentation set bundle. This should be a
973
+ # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
974
+ # will append .docset to the name.
975
+
976
+ DOCSET_BUNDLE_ID = org.doxygen.Project
977
+
978
+ # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
979
+ # the documentation publisher. This should be a reverse domain-name style
980
+ # string, e.g. com.mycompany.MyDocSet.documentation.
981
+
982
+ DOCSET_PUBLISHER_ID = org.doxygen.Publisher
983
+
984
+ # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
985
+
986
+ DOCSET_PUBLISHER_NAME = Publisher
987
+
988
+ # If the GENERATE_HTMLHELP tag is set to YES, additional index files
989
+ # will be generated that can be used as input for tools like the
990
+ # Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
991
+ # of the generated HTML documentation.
992
+
993
+ GENERATE_HTMLHELP = NO
994
+
995
+ # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
996
+ # be used to specify the file name of the resulting .chm file. You
997
+ # can add a path in front of the file if the result should not be
998
+ # written to the html output directory.
999
+
1000
+ CHM_FILE =
1001
+
1002
+ # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
1003
+ # be used to specify the location (absolute path including file name) of
1004
+ # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
1005
+ # the HTML help compiler on the generated index.hhp.
1006
+
1007
+ HHC_LOCATION =
1008
+
1009
+ # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
1010
+ # controls if a separate .chi index file is generated (YES) or that
1011
+ # it should be included in the master .chm file (NO).
1012
+
1013
+ GENERATE_CHI = NO
1014
+
1015
+ # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
1016
+ # is used to encode HtmlHelp index (hhk), content (hhc) and project file
1017
+ # content.
1018
+
1019
+ CHM_INDEX_ENCODING =
1020
+
1021
+ # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
1022
+ # controls whether a binary table of contents is generated (YES) or a
1023
+ # normal table of contents (NO) in the .chm file.
1024
+
1025
+ BINARY_TOC = NO
1026
+
1027
+ # The TOC_EXPAND flag can be set to YES to add extra items for group members
1028
+ # to the contents of the HTML help documentation and to the tree view.
1029
+
1030
+ TOC_EXPAND = NO
1031
+
1032
+ # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
1033
+ # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
1034
+ # that can be used as input for Qt's qhelpgenerator to generate a
1035
+ # Qt Compressed Help (.qch) of the generated HTML documentation.
1036
+
1037
+ GENERATE_QHP = NO
1038
+
1039
+ # If the QHG_LOCATION tag is specified, the QCH_FILE tag can
1040
+ # be used to specify the file name of the resulting .qch file.
1041
+ # The path specified is relative to the HTML output folder.
1042
+
1043
+ QCH_FILE =
1044
+
1045
+ # The QHP_NAMESPACE tag specifies the namespace to use when generating
1046
+ # Qt Help Project output. For more information please see
1047
+ # http://doc.trolltech.com/qthelpproject.html#namespace
1048
+
1049
+ QHP_NAMESPACE = org.doxygen.Project
1050
+
1051
+ # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
1052
+ # Qt Help Project output. For more information please see
1053
+ # http://doc.trolltech.com/qthelpproject.html#virtual-folders
1054
+
1055
+ QHP_VIRTUAL_FOLDER = doc
1056
+
1057
+ # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
1058
+ # add. For more information please see
1059
+ # http://doc.trolltech.com/qthelpproject.html#custom-filters
1060
+
1061
+ QHP_CUST_FILTER_NAME =
1062
+
1063
+ # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
1064
+ # custom filter to add. For more information please see
1065
+ # <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
1066
+ # Qt Help Project / Custom Filters</a>.
1067
+
1068
+ QHP_CUST_FILTER_ATTRS =
1069
+
1070
+ # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
1071
+ # project's
1072
+ # filter section matches.
1073
+ # <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
1074
+ # Qt Help Project / Filter Attributes</a>.
1075
+
1076
+ QHP_SECT_FILTER_ATTRS =
1077
+
1078
+ # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
1079
+ # be used to specify the location of Qt's qhelpgenerator.
1080
+ # If non-empty doxygen will try to run qhelpgenerator on the generated
1081
+ # .qhp file.
1082
+
1083
+ QHG_LOCATION =
1084
+
1085
+ # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
1086
+ # will be generated, which together with the HTML files, form an Eclipse help
1087
+ # plugin. To install this plugin and make it available under the help contents
1088
+ # menu in Eclipse, the contents of the directory containing the HTML and XML
1089
+ # files needs to be copied into the plugins directory of eclipse. The name of
1090
+ # the directory within the plugins directory should be the same as
1091
+ # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
1092
+ # the help appears.
1093
+
1094
+ GENERATE_ECLIPSEHELP = NO
1095
+
1096
+ # A unique identifier for the eclipse help plugin. When installing the plugin
1097
+ # the directory name containing the HTML and XML files should also have
1098
+ # this name.
1099
+
1100
+ ECLIPSE_DOC_ID = org.doxygen.Project
1101
+
1102
+ # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
1103
+ # at top of each HTML page. The value NO (the default) enables the index and
1104
+ # the value YES disables it. Since the tabs have the same information as the
1105
+ # navigation tree you can set this option to NO if you already set
1106
+ # GENERATE_TREEVIEW to YES.
1107
+
1108
+ DISABLE_INDEX = NO
1109
+
1110
+ # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
1111
+ # structure should be generated to display hierarchical information.
1112
+ # If the tag value is set to YES, a side panel will be generated
1113
+ # containing a tree-like index structure (just like the one that
1114
+ # is generated for HTML Help). For this to work a browser that supports
1115
+ # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
1116
+ # Windows users are probably better off using the HTML help feature.
1117
+ # Since the tree basically has the same information as the tab index you
1118
+ # could consider to set DISABLE_INDEX to NO when enabling this option.
1119
+
1120
+ GENERATE_TREEVIEW = NO
1121
+
1122
+ # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
1123
+ # (range [0,1..20]) that doxygen will group on one line in the generated HTML
1124
+ # documentation. Note that a value of 0 will completely suppress the enum
1125
+ # values from appearing in the overview section.
1126
+
1127
+ ENUM_VALUES_PER_LINE = 4
1128
+
1129
+ # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
1130
+ # and Class Hierarchy pages using a tree view instead of an ordered list.
1131
+
1132
+ USE_INLINE_TREES = NO
1133
+
1134
+ # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
1135
+ # used to set the initial width (in pixels) of the frame in which the tree
1136
+ # is shown.
1137
+
1138
+ TREEVIEW_WIDTH = 250
1139
+
1140
+ # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
1141
+ # links to external symbols imported via tag files in a separate window.
1142
+
1143
+ EXT_LINKS_IN_WINDOW = NO
1144
+
1145
+ # Use this tag to change the font size of Latex formulas included
1146
+ # as images in the HTML documentation. The default is 10. Note that
1147
+ # when you change the font size after a successful doxygen run you need
1148
+ # to manually remove any form_*.png images from the HTML output directory
1149
+ # to force them to be regenerated.
1150
+
1151
+ FORMULA_FONTSIZE = 10
1152
+
1153
+ # Use the FORMULA_TRANPARENT tag to determine whether or not the images
1154
+ # generated for formulas are transparent PNGs. Transparent PNGs are
1155
+ # not supported properly for IE 6.0, but are supported on all modern browsers.
1156
+ # Note that when changing this option you need to delete any form_*.png files
1157
+ # in the HTML output before the changes have effect.
1158
+
1159
+ FORMULA_TRANSPARENT = YES
1160
+
1161
+ # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
1162
+ # (see http://www.mathjax.org) which uses client side Javascript for the
1163
+ # rendering instead of using prerendered bitmaps. Use this if you do not
1164
+ # have LaTeX installed or if you want to formulas look prettier in the HTML
1165
+ # output. When enabled you also need to install MathJax separately and
1166
+ # configure the path to it using the MATHJAX_RELPATH option.
1167
+
1168
+ USE_MATHJAX = NO
1169
+
1170
+ # When MathJax is enabled you need to specify the location relative to the
1171
+ # HTML output directory using the MATHJAX_RELPATH option. The destination
1172
+ # directory should contain the MathJax.js script. For instance, if the mathjax
1173
+ # directory is located at the same level as the HTML output directory, then
1174
+ # MATHJAX_RELPATH should be ../mathjax. The default value points to the
1175
+ # mathjax.org site, so you can quickly see the result without installing
1176
+ # MathJax, but it is strongly recommended to install a local copy of MathJax
1177
+ # before deployment.
1178
+
1179
+ MATHJAX_RELPATH = http://www.mathjax.org/mathjax
1180
+
1181
+ # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
1182
+ # names that should be enabled during MathJax rendering.
1183
+
1184
+ MATHJAX_EXTENSIONS =
1185
+
1186
+ # When the SEARCHENGINE tag is enabled doxygen will generate a search box
1187
+ # for the HTML output. The underlying search engine uses javascript
1188
+ # and DHTML and should work on any modern browser. Note that when using
1189
+ # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
1190
+ # (GENERATE_DOCSET) there is already a search function so this one should
1191
+ # typically be disabled. For large projects the javascript based search engine
1192
+ # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
1193
+
1194
+ SEARCHENGINE = YES
1195
+
1196
+ # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
1197
+ # implemented using a PHP enabled web server instead of at the web client
1198
+ # using Javascript. Doxygen will generate the search PHP script and index
1199
+ # file to put on the web server. The advantage of the server
1200
+ # based approach is that it scales better to large projects and allows
1201
+ # full text search. The disadvantages are that it is more difficult to setup
1202
+ # and does not have live searching capabilities.
1203
+
1204
+ SERVER_BASED_SEARCH = NO
1205
+
1206
+ #---------------------------------------------------------------------------
1207
+ # configuration options related to the LaTeX output
1208
+ #---------------------------------------------------------------------------
1209
+
1210
+ # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
1211
+ # generate Latex output.
1212
+
1213
+ GENERATE_LATEX = NO
1214
+
1215
+ # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
1216
+ # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1217
+ # put in front of it. If left blank `latex' will be used as the default path.
1218
+
1219
+ LATEX_OUTPUT = latex
1220
+
1221
+ # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
1222
+ # invoked. If left blank `latex' will be used as the default command name.
1223
+ # Note that when enabling USE_PDFLATEX this option is only used for
1224
+ # generating bitmaps for formulas in the HTML output, but not in the
1225
+ # Makefile that is written to the output directory.
1226
+
1227
+ LATEX_CMD_NAME = latex
1228
+
1229
+ # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
1230
+ # generate index for LaTeX. If left blank `makeindex' will be used as the
1231
+ # default command name.
1232
+
1233
+ MAKEINDEX_CMD_NAME = makeindex
1234
+
1235
+ # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
1236
+ # LaTeX documents. This may be useful for small projects and may help to
1237
+ # save some trees in general.
1238
+
1239
+ COMPACT_LATEX = NO
1240
+
1241
+ # The PAPER_TYPE tag can be used to set the paper type that is used
1242
+ # by the printer. Possible values are: a4, letter, legal and
1243
+ # executive. If left blank a4wide will be used.
1244
+
1245
+ PAPER_TYPE = a4
1246
+
1247
+ # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
1248
+ # packages that should be included in the LaTeX output.
1249
+
1250
+ EXTRA_PACKAGES =
1251
+
1252
+ # The LATEX_HEADER tag can be used to specify a personal LaTeX header for
1253
+ # the generated latex document. The header should contain everything until
1254
+ # the first chapter. If it is left blank doxygen will generate a
1255
+ # standard header. Notice: only use this tag if you know what you are doing!
1256
+
1257
+ LATEX_HEADER =
1258
+
1259
+ # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
1260
+ # the generated latex document. The footer should contain everything after
1261
+ # the last chapter. If it is left blank doxygen will generate a
1262
+ # standard footer. Notice: only use this tag if you know what you are doing!
1263
+
1264
+ LATEX_FOOTER =
1265
+
1266
+ # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
1267
+ # is prepared for conversion to pdf (using ps2pdf). The pdf file will
1268
+ # contain links (just like the HTML output) instead of page references
1269
+ # This makes the output suitable for online browsing using a pdf viewer.
1270
+
1271
+ PDF_HYPERLINKS = YES
1272
+
1273
+ # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
1274
+ # plain latex in the generated Makefile. Set this option to YES to get a
1275
+ # higher quality PDF documentation.
1276
+
1277
+ USE_PDFLATEX = YES
1278
+
1279
+ # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
1280
+ # command to the generated LaTeX files. This will instruct LaTeX to keep
1281
+ # running if errors occur, instead of asking the user for help.
1282
+ # This option is also used when generating formulas in HTML.
1283
+
1284
+ LATEX_BATCHMODE = NO
1285
+
1286
+ # If LATEX_HIDE_INDICES is set to YES then doxygen will not
1287
+ # include the index chapters (such as File Index, Compound Index, etc.)
1288
+ # in the output.
1289
+
1290
+ LATEX_HIDE_INDICES = NO
1291
+
1292
+ # If LATEX_SOURCE_CODE is set to YES then doxygen will include
1293
+ # source code with syntax highlighting in the LaTeX output.
1294
+ # Note that which sources are shown also depends on other settings
1295
+ # such as SOURCE_BROWSER.
1296
+
1297
+ LATEX_SOURCE_CODE = NO
1298
+
1299
+ # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
1300
+ # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
1301
+ # http://en.wikipedia.org/wiki/BibTeX for more info.
1302
+
1303
+ LATEX_BIB_STYLE = plain
1304
+
1305
+ #---------------------------------------------------------------------------
1306
+ # configuration options related to the RTF output
1307
+ #---------------------------------------------------------------------------
1308
+
1309
+ # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
1310
+ # The RTF output is optimized for Word 97 and may not look very pretty with
1311
+ # other RTF readers or editors.
1312
+
1313
+ GENERATE_RTF = NO
1314
+
1315
+ # The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
1316
+ # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1317
+ # put in front of it. If left blank `rtf' will be used as the default path.
1318
+
1319
+ RTF_OUTPUT = rtf
1320
+
1321
+ # If the COMPACT_RTF tag is set to YES Doxygen generates more compact
1322
+ # RTF documents. This may be useful for small projects and may help to
1323
+ # save some trees in general.
1324
+
1325
+ COMPACT_RTF = NO
1326
+
1327
+ # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
1328
+ # will contain hyperlink fields. The RTF file will
1329
+ # contain links (just like the HTML output) instead of page references.
1330
+ # This makes the output suitable for online browsing using WORD or other
1331
+ # programs which support those fields.
1332
+ # Note: wordpad (write) and others do not support links.
1333
+
1334
+ RTF_HYPERLINKS = NO
1335
+
1336
+ # Load style sheet definitions from file. Syntax is similar to doxygen's
1337
+ # config file, i.e. a series of assignments. You only have to provide
1338
+ # replacements, missing definitions are set to their default value.
1339
+
1340
+ RTF_STYLESHEET_FILE =
1341
+
1342
+ # Set optional variables used in the generation of an rtf document.
1343
+ # Syntax is similar to doxygen's config file.
1344
+
1345
+ RTF_EXTENSIONS_FILE =
1346
+
1347
+ #---------------------------------------------------------------------------
1348
+ # configuration options related to the man page output
1349
+ #---------------------------------------------------------------------------
1350
+
1351
+ # If the GENERATE_MAN tag is set to YES (the default) Doxygen will
1352
+ # generate man pages
1353
+
1354
+ GENERATE_MAN = NO
1355
+
1356
+ # The MAN_OUTPUT tag is used to specify where the man pages will be put.
1357
+ # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1358
+ # put in front of it. If left blank `man' will be used as the default path.
1359
+
1360
+ MAN_OUTPUT = man
1361
+
1362
+ # The MAN_EXTENSION tag determines the extension that is added to
1363
+ # the generated man pages (default is the subroutine's section .3)
1364
+
1365
+ MAN_EXTENSION = .3
1366
+
1367
+ # If the MAN_LINKS tag is set to YES and Doxygen generates man output,
1368
+ # then it will generate one additional man file for each entity
1369
+ # documented in the real man page(s). These additional files
1370
+ # only source the real man page, but without them the man command
1371
+ # would be unable to find the correct page. The default is NO.
1372
+
1373
+ MAN_LINKS = NO
1374
+
1375
+ #---------------------------------------------------------------------------
1376
+ # configuration options related to the XML output
1377
+ #---------------------------------------------------------------------------
1378
+
1379
+ # If the GENERATE_XML tag is set to YES Doxygen will
1380
+ # generate an XML file that captures the structure of
1381
+ # the code including all documentation.
1382
+
1383
+ GENERATE_XML = NO
1384
+
1385
+ # The XML_OUTPUT tag is used to specify where the XML pages will be put.
1386
+ # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1387
+ # put in front of it. If left blank `xml' will be used as the default path.
1388
+
1389
+ XML_OUTPUT = xml
1390
+
1391
+ # The XML_SCHEMA tag can be used to specify an XML schema,
1392
+ # which can be used by a validating XML parser to check the
1393
+ # syntax of the XML files.
1394
+
1395
+ XML_SCHEMA =
1396
+
1397
+ # The XML_DTD tag can be used to specify an XML DTD,
1398
+ # which can be used by a validating XML parser to check the
1399
+ # syntax of the XML files.
1400
+
1401
+ XML_DTD =
1402
+
1403
+ # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
1404
+ # dump the program listings (including syntax highlighting
1405
+ # and cross-referencing information) to the XML output. Note that
1406
+ # enabling this will significantly increase the size of the XML output.
1407
+
1408
+ XML_PROGRAMLISTING = YES
1409
+
1410
+ #---------------------------------------------------------------------------
1411
+ # configuration options for the AutoGen Definitions output
1412
+ #---------------------------------------------------------------------------
1413
+
1414
+ # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
1415
+ # generate an AutoGen Definitions (see autogen.sf.net) file
1416
+ # that captures the structure of the code including all
1417
+ # documentation. Note that this feature is still experimental
1418
+ # and incomplete at the moment.
1419
+
1420
+ GENERATE_AUTOGEN_DEF = NO
1421
+
1422
+ #---------------------------------------------------------------------------
1423
+ # configuration options related to the Perl module output
1424
+ #---------------------------------------------------------------------------
1425
+
1426
+ # If the GENERATE_PERLMOD tag is set to YES Doxygen will
1427
+ # generate a Perl module file that captures the structure of
1428
+ # the code including all documentation. Note that this
1429
+ # feature is still experimental and incomplete at the
1430
+ # moment.
1431
+
1432
+ GENERATE_PERLMOD = NO
1433
+
1434
+ # If the PERLMOD_LATEX tag is set to YES Doxygen will generate
1435
+ # the necessary Makefile rules, Perl scripts and LaTeX code to be able
1436
+ # to generate PDF and DVI output from the Perl module output.
1437
+
1438
+ PERLMOD_LATEX = NO
1439
+
1440
+ # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1441
+ # nicely formatted so it can be parsed by a human reader.
1442
+ # This is useful
1443
+ # if you want to understand what is going on.
1444
+ # On the other hand, if this
1445
+ # tag is set to NO the size of the Perl module output will be much smaller
1446
+ # and Perl will parse it just the same.
1447
+
1448
+ PERLMOD_PRETTY = YES
1449
+
1450
+ # The names of the make variables in the generated doxyrules.make file
1451
+ # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
1452
+ # This is useful so different doxyrules.make files included by the same
1453
+ # Makefile don't overwrite each other's variables.
1454
+
1455
+ PERLMOD_MAKEVAR_PREFIX =
1456
+
1457
+ #---------------------------------------------------------------------------
1458
+ # Configuration options related to the preprocessor
1459
+ #---------------------------------------------------------------------------
1460
+
1461
+ # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
1462
+ # evaluate all C-preprocessor directives found in the sources and include
1463
+ # files.
1464
+
1465
+ ENABLE_PREPROCESSING = YES
1466
+
1467
+ # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
1468
+ # names in the source code. If set to NO (the default) only conditional
1469
+ # compilation will be performed. Macro expansion can be done in a controlled
1470
+ # way by setting EXPAND_ONLY_PREDEF to YES.
1471
+
1472
+ MACRO_EXPANSION = NO
1473
+
1474
+ # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
1475
+ # then the macro expansion is limited to the macros specified with the
1476
+ # PREDEFINED and EXPAND_AS_DEFINED tags.
1477
+
1478
+ EXPAND_ONLY_PREDEF = NO
1479
+
1480
+ # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
1481
+ # pointed to by INCLUDE_PATH will be searched when a #include is found.
1482
+
1483
+ SEARCH_INCLUDES = YES
1484
+
1485
+ # The INCLUDE_PATH tag can be used to specify one or more directories that
1486
+ # contain include files that are not input files but should be processed by
1487
+ # the preprocessor.
1488
+
1489
+ INCLUDE_PATH =
1490
+
1491
+ # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
1492
+ # patterns (like *.h and *.hpp) to filter out the header-files in the
1493
+ # directories. If left blank, the patterns specified with FILE_PATTERNS will
1494
+ # be used.
1495
+
1496
+ INCLUDE_FILE_PATTERNS =
1497
+
1498
+ # The PREDEFINED tag can be used to specify one or more macro names that
1499
+ # are defined before the preprocessor is started (similar to the -D option of
1500
+ # gcc). The argument of the tag is a list of macros of the form: name
1501
+ # or name=definition (no spaces). If the definition and the = are
1502
+ # omitted =1 is assumed. To prevent a macro definition from being
1503
+ # undefined via #undef or recursively expanded use the := operator
1504
+ # instead of the = operator.
1505
+
1506
+ PREDEFINED =
1507
+
1508
+ # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1509
+ # this tag can be used to specify a list of macro names that should be expanded.
1510
+ # The macro definition that is found in the sources will be used.
1511
+ # Use the PREDEFINED tag if you want to use a different macro definition that
1512
+ # overrules the definition found in the source code.
1513
+
1514
+ EXPAND_AS_DEFINED =
1515
+
1516
+ # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1517
+ # doxygen's preprocessor will remove all references to function-like macros
1518
+ # that are alone on a line, have an all uppercase name, and do not end with a
1519
+ # semicolon, because these will confuse the parser if not removed.
1520
+
1521
+ SKIP_FUNCTION_MACROS = YES
1522
+
1523
+ #---------------------------------------------------------------------------
1524
+ # Configuration::additions related to external references
1525
+ #---------------------------------------------------------------------------
1526
+
1527
+ # The TAGFILES option can be used to specify one or more tagfiles.
1528
+ # Optionally an initial location of the external documentation
1529
+ # can be added for each tagfile. The format of a tag file without
1530
+ # this location is as follows:
1531
+ #
1532
+ # TAGFILES = file1 file2 ...
1533
+ # Adding location for the tag files is done as follows:
1534
+ #
1535
+ # TAGFILES = file1=loc1 "file2 = loc2" ...
1536
+ # where "loc1" and "loc2" can be relative or absolute paths or
1537
+ # URLs. If a location is present for each tag, the installdox tool
1538
+ # does not have to be run to correct the links.
1539
+ # Note that each tag file must have a unique name
1540
+ # (where the name does NOT include the path)
1541
+ # If a tag file is not located in the directory in which doxygen
1542
+ # is run, you must also specify the path to the tagfile here.
1543
+
1544
+ TAGFILES =
1545
+
1546
+ # When a file name is specified after GENERATE_TAGFILE, doxygen will create
1547
+ # a tag file that is based on the input files it reads.
1548
+
1549
+ GENERATE_TAGFILE =
1550
+
1551
+ # If the ALLEXTERNALS tag is set to YES all external classes will be listed
1552
+ # in the class index. If set to NO only the inherited external classes
1553
+ # will be listed.
1554
+
1555
+ ALLEXTERNALS = NO
1556
+
1557
+ # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
1558
+ # in the modules index. If set to NO, only the current project's groups will
1559
+ # be listed.
1560
+
1561
+ EXTERNAL_GROUPS = YES
1562
+
1563
+ # The PERL_PATH should be the absolute path and name of the perl script
1564
+ # interpreter (i.e. the result of `which perl').
1565
+
1566
+ PERL_PATH = /usr/bin/perl
1567
+
1568
+ #---------------------------------------------------------------------------
1569
+ # Configuration options related to the dot tool
1570
+ #---------------------------------------------------------------------------
1571
+
1572
+ # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
1573
+ # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
1574
+ # or super classes. Setting the tag to NO turns the diagrams off. Note that
1575
+ # this option also works with HAVE_DOT disabled, but it is recommended to
1576
+ # install and use dot, since it yields more powerful graphs.
1577
+
1578
+ CLASS_DIAGRAMS = YES
1579
+
1580
+ # You can define message sequence charts within doxygen comments using the \msc
1581
+ # command. Doxygen will then run the mscgen tool (see
1582
+ # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
1583
+ # documentation. The MSCGEN_PATH tag allows you to specify the directory where
1584
+ # the mscgen tool resides. If left empty the tool is assumed to be found in the
1585
+ # default search path.
1586
+
1587
+ MSCGEN_PATH =
1588
+
1589
+ # If set to YES, the inheritance and collaboration graphs will hide
1590
+ # inheritance and usage relations if the target is undocumented
1591
+ # or is not a class.
1592
+
1593
+ HIDE_UNDOC_RELATIONS = YES
1594
+
1595
+ # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
1596
+ # available from the path. This tool is part of Graphviz, a graph visualization
1597
+ # toolkit from AT&T and Lucent Bell Labs. The other options in this section
1598
+ # have no effect if this option is set to NO (the default)
1599
+
1600
+ HAVE_DOT = YES
1601
+
1602
+ # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
1603
+ # allowed to run in parallel. When set to 0 (the default) doxygen will
1604
+ # base this on the number of processors available in the system. You can set it
1605
+ # explicitly to a value larger than 0 to get control over the balance
1606
+ # between CPU load and processing speed.
1607
+
1608
+ DOT_NUM_THREADS = 0
1609
+
1610
+ # By default doxygen will use the Helvetica font for all dot files that
1611
+ # doxygen generates. When you want a differently looking font you can specify
1612
+ # the font name using DOT_FONTNAME. You need to make sure dot is able to find
1613
+ # the font, which can be done by putting it in a standard location or by setting
1614
+ # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
1615
+ # directory containing the font.
1616
+
1617
+ DOT_FONTNAME = Helvetica
1618
+
1619
+ # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1620
+ # The default size is 10pt.
1621
+
1622
+ DOT_FONTSIZE = 10
1623
+
1624
+ # By default doxygen will tell dot to use the Helvetica font.
1625
+ # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
1626
+ # set the path where dot can find it.
1627
+
1628
+ DOT_FONTPATH =
1629
+
1630
+ # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
1631
+ # will generate a graph for each documented class showing the direct and
1632
+ # indirect inheritance relations. Setting this tag to YES will force the
1633
+ # CLASS_DIAGRAMS tag to NO.
1634
+
1635
+ CLASS_GRAPH = YES
1636
+
1637
+ # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
1638
+ # will generate a graph for each documented class showing the direct and
1639
+ # indirect implementation dependencies (inheritance, containment, and
1640
+ # class references variables) of the class with other documented classes.
1641
+
1642
+ COLLABORATION_GRAPH = YES
1643
+
1644
+ # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
1645
+ # will generate a graph for groups, showing the direct groups dependencies
1646
+
1647
+ GROUP_GRAPHS = YES
1648
+
1649
+ # If the UML_LOOK tag is set to YES doxygen will generate inheritance and
1650
+ # collaboration diagrams in a style similar to the OMG's Unified Modeling
1651
+ # Language.
1652
+
1653
+ UML_LOOK = NO
1654
+
1655
+ # If set to YES, the inheritance and collaboration graphs will show the
1656
+ # relations between templates and their instances.
1657
+
1658
+ TEMPLATE_RELATIONS = YES
1659
+
1660
+ # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
1661
+ # tags are set to YES then doxygen will generate a graph for each documented
1662
+ # file showing the direct and indirect include dependencies of the file with
1663
+ # other documented files.
1664
+
1665
+ INCLUDE_GRAPH = YES
1666
+
1667
+ # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
1668
+ # HAVE_DOT tags are set to YES then doxygen will generate a graph for each
1669
+ # documented header file showing the documented files that directly or
1670
+ # indirectly include this file.
1671
+
1672
+ INCLUDED_BY_GRAPH = YES
1673
+
1674
+ # If the CALL_GRAPH and HAVE_DOT options are set to YES then
1675
+ # doxygen will generate a call dependency graph for every global function
1676
+ # or class method. Note that enabling this option will significantly increase
1677
+ # the time of a run. So in most cases it will be better to enable call graphs
1678
+ # for selected functions only using the \callgraph command.
1679
+
1680
+ CALL_GRAPH = NO
1681
+
1682
+ # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
1683
+ # doxygen will generate a caller dependency graph for every global function
1684
+ # or class method. Note that enabling this option will significantly increase
1685
+ # the time of a run. So in most cases it will be better to enable caller
1686
+ # graphs for selected functions only using the \callergraph command.
1687
+
1688
+ CALLER_GRAPH = NO
1689
+
1690
+ # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
1691
+ # will generate a graphical hierarchy of all classes instead of a textual one.
1692
+
1693
+ GRAPHICAL_HIERARCHY = YES
1694
+
1695
+ # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
1696
+ # then doxygen will show the dependencies a directory has on other directories
1697
+ # in a graphical way. The dependency relations are determined by the #include
1698
+ # relations between the files in the directories.
1699
+
1700
+ DIRECTORY_GRAPH = YES
1701
+
1702
+ # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
1703
+ # generated by dot. Possible values are svg, png, jpg, or gif.
1704
+ # If left blank png will be used. If you choose svg you need to set
1705
+ # HTML_FILE_EXTENSION to xhtml in order to make the SVG files
1706
+ # visible in IE 9+ (other browsers do not have this requirement).
1707
+
1708
+ DOT_IMAGE_FORMAT = png
1709
+
1710
+ # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
1711
+ # enable generation of interactive SVG images that allow zooming and panning.
1712
+ # Note that this requires a modern browser other than Internet Explorer.
1713
+ # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
1714
+ # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
1715
+ # visible. Older versions of IE do not have SVG support.
1716
+
1717
+ INTERACTIVE_SVG = NO
1718
+
1719
+ # The tag DOT_PATH can be used to specify the path where the dot tool can be
1720
+ # found. If left blank, it is assumed the dot tool can be found in the path.
1721
+
1722
+ DOT_PATH =
1723
+
1724
+ # The DOTFILE_DIRS tag can be used to specify one or more directories that
1725
+ # contain dot files that are included in the documentation (see the
1726
+ # \dotfile command).
1727
+
1728
+ DOTFILE_DIRS =
1729
+
1730
+ # The MSCFILE_DIRS tag can be used to specify one or more directories that
1731
+ # contain msc files that are included in the documentation (see the
1732
+ # \mscfile command).
1733
+
1734
+ MSCFILE_DIRS =
1735
+
1736
+ # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
1737
+ # nodes that will be shown in the graph. If the number of nodes in a graph
1738
+ # becomes larger than this value, doxygen will truncate the graph, which is
1739
+ # visualized by representing a node as a red box. Note that doxygen if the
1740
+ # number of direct children of the root node in a graph is already larger than
1741
+ # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
1742
+ # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
1743
+
1744
+ DOT_GRAPH_MAX_NODES = 50
1745
+
1746
+ # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
1747
+ # graphs generated by dot. A depth value of 3 means that only nodes reachable
1748
+ # from the root by following a path via at most 3 edges will be shown. Nodes
1749
+ # that lay further from the root node will be omitted. Note that setting this
1750
+ # option to 1 or 2 may greatly reduce the computation time needed for large
1751
+ # code bases. Also note that the size of a graph can be further restricted by
1752
+ # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
1753
+
1754
+ MAX_DOT_GRAPH_DEPTH = 0
1755
+
1756
+ # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1757
+ # background. This is disabled by default, because dot on Windows does not
1758
+ # seem to support this out of the box. Warning: Depending on the platform used,
1759
+ # enabling this option may lead to badly anti-aliased labels on the edges of
1760
+ # a graph (i.e. they become hard to read).
1761
+
1762
+ DOT_TRANSPARENT = NO
1763
+
1764
+ # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
1765
+ # files in one run (i.e. multiple -o and -T options on the command line). This
1766
+ # makes dot run faster, but since only newer versions of dot (>1.8.10)
1767
+ # support this, this feature is disabled by default.
1768
+
1769
+ DOT_MULTI_TARGETS = YES
1770
+
1771
+ # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
1772
+ # generate a legend page explaining the meaning of the various boxes and
1773
+ # arrows in the dot generated graphs.
1774
+
1775
+ GENERATE_LEGEND = YES
1776
+
1777
+ # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
1778
+ # remove the intermediate dot files that are used to generate
1779
+ # the various graphs.
1780
+
1781
+ DOT_CLEANUP = YES
mosesdecoder/moses-cmd/Jamfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
2
+
3
+ exe moses : Main.cpp deps ;
4
+ exe vwtrainer : MainVW.cpp deps ;
5
+ exe lmbrgrid : LatticeMBRGrid.cpp deps ;
6
+ alias programs : moses lmbrgrid vwtrainer ;
7
+
mosesdecoder/moses-cmd/LatticeMBRGrid.cpp ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id: LatticeMBRGrid.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (c) 2010 University of Edinburgh
6
+ All rights reserved.
7
+
8
+ Redistribution and use in source and binary forms, with or without modification,
9
+ are permitted provided that the following conditions are met:
10
+
11
+ * Redistributions of source code must retain the above copyright notice,
12
+ this list of conditions and the following disclaimer.
13
+ * Redistributions in binary form must reproduce the above copyright notice,
14
+ this list of conditions and the following disclaimer in the documentation
15
+ and/or other materials provided with the distribution.
16
+ * Neither the name of the University of Edinburgh nor the names of its contributors
17
+ may be used to endorse or promote products derived from this software
18
+ without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
24
+ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
28
+ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ POSSIBILITY OF SUCH DAMAGE.
31
+ ***********************************************************************/
32
+ /**
33
+ * Lattice MBR grid search. Enables a grid search through the four parameters (p,r,scale and prune) used in lattice MBR.
34
+ See 'Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation by Tromble, Kumar, Och and Macherey,
35
+ EMNLP 2008 for details of the parameters.
36
+
37
+ The grid search is controlled by specifying comma separated lists for the lmbr parameters (-lmbr-p, -lmbr-r,
38
+ -lmbr-pruning-factor and -mbr-scale). All other parameters are passed through to moses. If any of the lattice mbr
39
+ parameters are missing, then they are set to their default values. Output is of the form:
40
+ sentence-id ||| p r prune scale ||| translation-hypothesis
41
+ **/
42
+
43
+ #include <cstdlib>
44
+ #include <iostream>
45
+ #include <map>
46
+ #include <stdexcept>
47
+ #include <set>
48
+
49
+ #include "moses/IOWrapper.h"
50
+ #include "moses/LatticeMBR.h"
51
+ #include "moses/Manager.h"
52
+ #include "moses/Timer.h"
53
+ #include "moses/StaticData.h"
54
+ #include "util/exception.hh"
55
+
56
+ #include <boost/foreach.hpp>
57
+ #include "moses/TranslationTask.h"
58
+
59
+ using namespace std;
60
+ using namespace Moses;
61
+
62
+ //keys
63
+ enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
64
+
65
+ namespace Moses
66
+ {
67
+
68
+ class Grid
69
+ {
70
+ public:
71
+ /** Add a parameter with key, command line argument, and default value */
72
+ void addParam(gridkey key, const string& arg, float defaultValue) {
73
+ m_args[arg] = key;
74
+ UTIL_THROW_IF2(m_grid.find(key) != m_grid.end(),
75
+ "Couldn't find value for key " << (int) key);
76
+ m_grid[key].push_back(defaultValue);
77
+ }
78
+
79
+ /** Parse the arguments, removing those that define the grid and returning a copy of the rest */
80
+ void parseArgs(int& argc, char const**& argv) {
81
+ char const** newargv = new char const*[argc+1]; //Space to add mbr parameter
82
+ int newargc = 0;
83
+ for (int i = 0; i < argc; ++i) {
84
+ bool consumed = false;
85
+ for (map<string,gridkey>::const_iterator argi = m_args.begin(); argi != m_args.end(); ++argi) {
86
+ if (!strcmp(argv[i], argi->first.c_str())) {
87
+ ++i;
88
+ if (i >= argc) {
89
+ cerr << "Error: missing parameter for " << argi->first << endl;
90
+ throw runtime_error("Missing parameter");
91
+ } else {
92
+ string value = argv[i];
93
+ gridkey key = argi->second;
94
+ if (m_grid[key].size() != 1) {
95
+ throw runtime_error("Duplicate grid argument");
96
+ }
97
+ m_grid[key].clear();
98
+ char delim = ',';
99
+ string::size_type lastpos = value.find_first_not_of(delim);
100
+ string::size_type pos = value.find_first_of(delim,lastpos);
101
+ while (string::npos != pos || string::npos != lastpos) {
102
+ float param = atof(value.substr(lastpos, pos-lastpos).c_str());
103
+ if (!param) {
104
+ cerr << "Error: Illegal grid parameter for " << argi->first << endl;
105
+ throw runtime_error("Illegal grid parameter");
106
+ }
107
+ m_grid[key].push_back(param);
108
+ lastpos = value.find_first_not_of(delim,pos);
109
+ pos = value.find_first_of(delim,lastpos);
110
+ }
111
+ consumed = true;
112
+ }
113
+ if (consumed) break;
114
+ }
115
+ }
116
+ if (!consumed) {
117
+ // newargv[newargc] = new char[strlen(argv[i]) + 1];
118
+ // strcpy(newargv[newargc],argv[i]);
119
+ newargv[newargc] = argv[i];
120
+ ++newargc;
121
+ }
122
+ }
123
+ argc = newargc;
124
+ argv = newargv;
125
+ }
126
+
127
+ /** Get the grid for a particular key.*/
128
+ const vector<float>& getGrid(gridkey key) const {
129
+ map<gridkey,vector<float> >::const_iterator iter = m_grid.find(key);
130
+ assert (iter != m_grid.end());
131
+ return iter->second;
132
+
133
+ }
134
+
135
+ private:
136
+ map<gridkey,vector<float> > m_grid;
137
+ map<string,gridkey> m_args;
138
+ };
139
+
140
+ } // namespace
141
+
142
+ int main(int argc, char const* argv[])
143
+ {
144
+ cerr << "Lattice MBR Grid search" << endl;
145
+
146
+ Grid grid;
147
+ grid.addParam(lmbr_p, "-lmbr-p", 0.5);
148
+ grid.addParam(lmbr_r, "-lmbr-r", 0.5);
149
+ grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
150
+ grid.addParam(lmbr_scale, "-mbr-scale",1.0);
151
+
152
+ grid.parseArgs(argc,argv);
153
+
154
+ Parameter* params = new Parameter();
155
+ if (!params->LoadParam(argc,argv)) {
156
+ params->Explain();
157
+ exit(1);
158
+ }
159
+
160
+ ResetUserTime();
161
+ if (!StaticData::LoadDataStatic(params, argv[0])) {
162
+ exit(1);
163
+ }
164
+
165
+ StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
166
+ boost::shared_ptr<AllOptions> opts(new AllOptions(*SD.options()));
167
+ LMBR_Options& lmbr = opts->lmbr;
168
+ MBR_Options& mbr = opts->mbr;
169
+ lmbr.enabled = true;
170
+
171
+ boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
172
+ if (!ioWrapper) {
173
+ throw runtime_error("Failed to initialise IOWrapper");
174
+ }
175
+ size_t nBestSize = mbr.size;
176
+
177
+ if (nBestSize <= 0) {
178
+ throw new runtime_error("Non-positive size specified for n-best list");
179
+ }
180
+
181
+ const vector<float>& pgrid = grid.getGrid(lmbr_p);
182
+ const vector<float>& rgrid = grid.getGrid(lmbr_r);
183
+ const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
184
+ const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
185
+
186
+ boost::shared_ptr<InputType> source;
187
+ while((source = ioWrapper->ReadInput()) != NULL) {
188
+ // set up task of translating one sentence
189
+ boost::shared_ptr<TranslationTask> ttask;
190
+ ttask = TranslationTask::create(source, ioWrapper);
191
+ Manager manager(ttask);
192
+ manager.Decode();
193
+ TrellisPathList nBestList;
194
+ manager.CalcNBest(nBestSize, nBestList,true);
195
+ //grid search
196
+ BOOST_FOREACH(float const& p, pgrid) {
197
+ lmbr.precision = p;
198
+ BOOST_FOREACH(float const& r, rgrid) {
199
+ lmbr.ratio = r;
200
+ BOOST_FOREACH(size_t const prune_i, prune_grid) {
201
+ lmbr.pruning_factor = prune_i;
202
+ BOOST_FOREACH(float const& scale_i, scale_grid) {
203
+ mbr.scale = scale_i;
204
+ size_t lineCount = source->GetTranslationId();
205
+ cout << lineCount << " ||| " << p << " "
206
+ << r << " " << size_t(prune_i) << " " << scale_i
207
+ << " ||| ";
208
+ vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
209
+ manager.OutputBestHypo(mbrBestHypo, cout);
210
+ }
211
+ }
212
+ }
213
+ }
214
+ }
215
+ }
mosesdecoder/moses-cmd/Main.cpp ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2009 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ /**
23
+ * Moses main wrapper for executable for single-threaded and multi-threaded, simply calling decoder_main.
24
+ **/
25
+ #include "moses/ExportInterface.h"
26
+ #include "util/string_stream.hh"
27
+
28
+ /** main function of the command line version of the decoder **/
29
+ int main(int argc, char const** argv)
30
+ {
31
+ return decoder_main(argc, argv);
32
+ }
33
+
mosesdecoder/moses-cmd/MainVW.cpp ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2009 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ /**
23
+ * Moses main, for single-threaded and multi-threaded.
24
+ **/
25
+ #include <exception>
26
+ #include <fstream>
27
+ #include <sstream>
28
+ #include <vector>
29
+
30
+ #include "util/usage.hh"
31
+
32
+ #ifdef WIN32
33
+ // Include Visual Leak Detector
34
+ //#include <vld.h>
35
+ #endif
36
+
37
+ #include "moses/IOWrapper.h"
38
+ #include "moses/Hypothesis.h"
39
+ #include "moses/Manager.h"
40
+ #include "moses/StaticData.h"
41
+ #include "moses/TypeDef.h"
42
+ #include "moses/Util.h"
43
+ #include "moses/Timer.h"
44
+ #include "moses/TranslationModel/PhraseDictionary.h"
45
+ #include "moses/FF/StatefulFeatureFunction.h"
46
+ #include "moses/FF/StatelessFeatureFunction.h"
47
+ #include "moses/TrainingTask.h"
48
+ #include "util/random.hh"
49
+
50
+ #ifdef HAVE_PROTOBUF
51
+ #include "hypergraph.pb.h"
52
+ #endif
53
+
54
+ using namespace std;
55
+ using namespace Moses;
56
+
57
+ namespace Moses
58
+ {
59
+
60
+ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
61
+ {
62
+ outputSearchGraphStream.setf(std::ios::fixed);
63
+ outputSearchGraphStream.precision(6);
64
+ StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
65
+ }
66
+
67
+
68
+ } //namespace
69
+
70
+ /** main function of the command line version of the decoder **/
71
+ int main(int argc, char const** argv)
72
+ {
73
+ //setting in the Staticdata a link between the thread id of this process and a NULL tasksptr
74
+ // StaticData::InstanceNonConst().SetTask(); // => moved into StaticData constructor
75
+
76
+ try {
77
+
78
+ #ifdef HAVE_PROTOBUF
79
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
80
+ #endif
81
+
82
+ // echo command line, if verbose
83
+ IFVERBOSE(1) {
84
+ TRACE_ERR("command: ");
85
+ for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
86
+ TRACE_ERR(endl);
87
+ }
88
+
89
+ // set number of significant decimals in output
90
+ FixPrecision(cout);
91
+ FixPrecision(cerr);
92
+
93
+ // load all the settings into the Parameter class
94
+ // (stores them as strings, or array of strings)
95
+ Parameter params;
96
+ if (!params.LoadParam(argc,argv)) {
97
+ exit(1);
98
+ }
99
+
100
+
101
+ // initialize all "global" variables, which are stored in StaticData
102
+ // note: this also loads models such as the language model, etc.
103
+ ResetUserTime();
104
+ if (!StaticData::LoadDataStatic(&params, argv[0])) {
105
+ exit(1);
106
+ }
107
+
108
+ // setting "-show-weights" -> just dump out weights and exit
109
+ if (params.isParamSpecified("show-weights")) {
110
+ ShowWeights();
111
+ exit(0);
112
+ }
113
+
114
+ // shorthand for accessing information in StaticData
115
+ const StaticData& staticData = StaticData::Instance();
116
+
117
+
118
+ //initialise random numbers
119
+ util::rand_init();
120
+
121
+ // set up read/writing class
122
+ IFVERBOSE(1) {
123
+ PrintUserTime("Created input-output object");
124
+ }
125
+ AllOptions::ptr opts(new AllOptions(*StaticData::Instance().options()));
126
+ boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
127
+ if (ioWrapper == NULL) {
128
+ cerr << "Error; Failed to create IO object" << endl;
129
+ exit(1);
130
+ }
131
+
132
+ // check on weights
133
+ const ScoreComponentCollection& weights = staticData.GetAllWeights();
134
+ IFVERBOSE(2) {
135
+ TRACE_ERR("The global weight vector looks like this: ");
136
+ TRACE_ERR(weights);
137
+ TRACE_ERR("\n");
138
+ }
139
+
140
+ #ifdef WITH_THREADS
141
+ #pragma message ("Compiling with Threads.")
142
+ ThreadPool pool(staticData.ThreadCount());
143
+ #endif
144
+
145
+ // main loop over set of input sentences
146
+
147
+ boost::shared_ptr<ContextScope> scope(new ContextScope);
148
+ boost::shared_ptr<InputType> source;
149
+ while ((source = ioWrapper->ReadInput()) != NULL) {
150
+ IFVERBOSE(1) {
151
+ ResetUserTime();
152
+ }
153
+
154
+ // set up task of training one sentence
155
+ boost::shared_ptr<TrainingTask> task;
156
+ task = TrainingTask::create(source, ioWrapper, scope);
157
+
158
+ // execute task
159
+ #ifdef WITH_THREADS
160
+ pool.Submit(task);
161
+ #else
162
+ task->Run();
163
+ #endif
164
+ }
165
+
166
+ // we are done, finishing up
167
+ #ifdef WITH_THREADS
168
+ pool.Stop(true); //flush remaining jobs
169
+ #endif
170
+
171
+ FeatureFunction::Destroy();
172
+
173
+ } catch (const std::exception &e) {
174
+ std::cerr << "Exception: " << e.what() << std::endl;
175
+ return EXIT_FAILURE;
176
+ }
177
+
178
+ IFVERBOSE(1) util::PrintUsage(std::cerr);
179
+
180
+ #ifndef EXIT_RETURN
181
+ //This avoids that destructors are called (it can take a long time)
182
+ exit(EXIT_SUCCESS);
183
+ #else
184
+ return EXIT_SUCCESS;
185
+ #endif
186
+ }
mosesdecoder/moses-cmd/MainVW.h ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ // $Id$
3
+
4
+ /***********************************************************************
5
+ Moses - factored phrase-based language decoder
6
+ Copyright (c) 2006 University of Edinburgh
7
+ All rights reserved.
8
+
9
+ Redistribution and use in source and binary forms, with or without modification,
10
+ are permitted provided that the following conditions are met:
11
+
12
+ * Redistributions of source code must retain the above copyright notice,
13
+ this list of conditions and the following disclaimer.
14
+ * Redistributions in binary form must reproduce the above copyright notice,
15
+ this list of conditions and the following disclaimer in the documentation
16
+ and/or other materials provided with the distribution.
17
+ * Neither the name of the University of Edinburgh nor the names of its contributors
18
+ may be used to endorse or promote products derived from this software
19
+ without specific prior written permission.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
25
+ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
29
+ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31
+ POSSIBILITY OF SUCH DAMAGE.
32
+ ***********************************************************************/
33
+
34
+ // example file on how to use moses library
35
+
36
+
37
+ #include "moses/StaticData.h"
38
+
39
+ class IOWrapper;
40
+
41
+ int main(int argc, char* argv[]);
42
+
mosesdecoder/moses2/AlignmentInfo.h ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #pragma once
21
+
22
+ #include <iostream>
23
+ #include <ostream>
24
+ #include <set>
25
+ #include <vector>
26
+ #include <cstdlib>
27
+
28
+ #include <boost/functional/hash.hpp>
29
+ #include "TypeDef.h"
30
+
31
+ namespace Moses2
32
+ {
33
+
34
+ class AlignmentInfoCollection;
35
+ class System;
36
+
37
+ /** Collection of non-terminal alignment pairs, ordered by source index.
38
+ * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
39
+ */
40
+ class AlignmentInfo
41
+ {
42
+ friend struct AlignmentInfoOrderer;
43
+ friend struct AlignmentInfoHasher;
44
+ friend class AlignmentInfoCollection;
45
+ friend class VW;
46
+
47
+ friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
48
+
49
+ public:
50
+ typedef std::set<std::pair<size_t,size_t> > CollType;
51
+ typedef std::vector<size_t> NonTermIndexMap;
52
+ typedef CollType::const_iterator const_iterator;
53
+
54
+ const_iterator begin() const {
55
+ return m_collection.begin();
56
+ }
57
+ const_iterator end() const {
58
+ return m_collection.end();
59
+ }
60
+
61
+ void Add(size_t sourcePos, size_t targetPos) {
62
+ m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
63
+ }
64
+ /** Provides a map from target-side to source-side non-terminal indices.
65
+ * The target-side index should be the rule symbol index (COUNTING terminals).
66
+ * The index returned is the rule non-terminal index (IGNORING terminals).
67
+ */
68
+ const NonTermIndexMap &GetNonTermIndexMap() const {
69
+ return m_nonTermIndexMap;
70
+ }
71
+
72
+ /** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
73
+ * the index counting both terminals and non-terminals) */
74
+ const NonTermIndexMap &GetNonTermIndexMap2() const {
75
+ return m_nonTermIndexMap2;
76
+ }
77
+
78
+ const CollType &GetAlignments() const {
79
+ return m_collection;
80
+ }
81
+
82
+ std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
83
+ std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
84
+
85
+ size_t GetSize() const {
86
+ return m_collection.size();
87
+ }
88
+
89
+ std::vector< const std::pair<size_t,size_t>* >
90
+ GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
91
+
92
+ std::vector<size_t> GetSourceIndex2PosMap() const;
93
+
94
+ bool operator==(const AlignmentInfo& rhs) const {
95
+ return m_collection == rhs.m_collection &&
96
+ m_nonTermIndexMap == rhs.m_nonTermIndexMap;
97
+ }
98
+
99
+ std::string Debug(const System &system) const;
100
+
101
+ private:
102
+ //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
103
+ explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
104
+ explicit AlignmentInfo(const std::vector<unsigned char> &aln);
105
+
106
+ // used only by VW to load word alignment between sentences
107
+ explicit AlignmentInfo(const std::string &str);
108
+
109
+ void BuildNonTermIndexMaps();
110
+
111
+ CollType m_collection;
112
+ NonTermIndexMap m_nonTermIndexMap;
113
+ NonTermIndexMap m_nonTermIndexMap2;
114
+ };
115
+
116
+ /** Define an arbitrary strict weak ordering between AlignmentInfo objects
117
+ * for use by AlignmentInfoCollection.
118
+ */
119
+ struct AlignmentInfoOrderer {
120
+ bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
121
+ if (a.m_collection == b.m_collection) {
122
+ return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
123
+ } else {
124
+ return a.m_collection < b.m_collection;
125
+ }
126
+ }
127
+ };
128
+
129
+ /**
130
+ * Hashing functoid
131
+ **/
132
+ struct AlignmentInfoHasher {
133
+ size_t operator()(const AlignmentInfo& a) const {
134
+ size_t seed = 0;
135
+ boost::hash_combine(seed,a.m_collection);
136
+ boost::hash_combine(seed,a.m_nonTermIndexMap);
137
+ return seed;
138
+ }
139
+
140
+ };
141
+
142
+ inline size_t hash_value(const AlignmentInfo& a)
143
+ {
144
+ static AlignmentInfoHasher hasher;
145
+ return hasher(a);
146
+ }
147
+
148
+ }
mosesdecoder/moses2/AlignmentInfoCollection.cpp ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /***********************************************************************
2
+ Moses - statistical machine translation system
3
+ Copyright (C) 2006-2011 University of Edinburgh
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ***********************************************************************/
19
+
20
+ #include "AlignmentInfoCollection.h"
21
+
22
+ using namespace std;
23
+
24
+ namespace Moses2
25
+ {
26
+
27
+ AlignmentInfoCollection AlignmentInfoCollection::s_instance;
28
+
29
+ AlignmentInfoCollection::AlignmentInfoCollection()
30
+ {
31
+ std::set<std::pair<size_t,size_t> > pairs;
32
+ m_emptyAlignmentInfo = Add(pairs);
33
+ }
34
+
35
+ AlignmentInfoCollection::~AlignmentInfoCollection()
36
+ {}
37
+
38
+ const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
39
+ {
40
+ return *m_emptyAlignmentInfo;
41
+ }
42
+
43
+ AlignmentInfo const *
44
+ AlignmentInfoCollection::
45
+ Add(AlignmentInfo const& ainfo)
46
+ {
47
+ #ifdef WITH_THREADS
48
+ {
49
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
50
+ AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
51
+ if (i != m_collection.end())
52
+ return &*i;
53
+ }
54
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
55
+ #endif
56
+ std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
57
+ return &(*ret.first);
58
+ }
59
+
60
+
61
+
62
+ }
mosesdecoder/moses2/ArcLists.cpp ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * ArcList.cpp
3
+ *
4
+ * Created on: 26 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include <iostream>
8
+ #include <sstream>
9
+ #include <algorithm>
10
+ #include <boost/foreach.hpp>
11
+ #include "ArcLists.h"
12
+ #include "HypothesisBase.h"
13
+ #include "util/exception.hh"
14
+
15
+ using namespace std;
16
+
17
+ namespace Moses2
18
+ {
19
+
20
+ ArcLists::ArcLists()
21
+ {
22
+ // TODO Auto-generated constructor stub
23
+
24
+ }
25
+
26
+ ArcLists::~ArcLists()
27
+ {
28
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
29
+ const ArcList *arcList = collPair.second;
30
+ delete arcList;
31
+ }
32
+ }
33
+
34
+ void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
35
+ const HypothesisBase *otherHypo)
36
+ {
37
+ //cerr << added << " " << currHypo << " " << otherHypo << endl;
38
+ ArcList *arcList;
39
+ if (added) {
40
+ // we're winners!
41
+ if (otherHypo) {
42
+ // there was a existing losing hypo
43
+ arcList = &GetAndDetachArcList(otherHypo);
44
+ } else {
45
+ // there was no existing hypo
46
+ arcList = new ArcList;
47
+ }
48
+ m_coll[currHypo] = arcList;
49
+ } else {
50
+ // we're losers!
51
+ // there should be a winner, we're not doing beam pruning
52
+ UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
53
+ arcList = &GetArcList(otherHypo);
54
+ }
55
+
56
+ // in any case, add the curr hypo
57
+ arcList->push_back(currHypo);
58
+ }
59
+
60
+ ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
61
+ {
62
+ Coll::iterator iter = m_coll.find(hypo);
63
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
64
+ ArcList &arcList = *iter->second;
65
+ return arcList;
66
+ }
67
+
68
+ const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
69
+ {
70
+ Coll::const_iterator iter = m_coll.find(hypo);
71
+
72
+ if (iter == m_coll.end()) {
73
+ cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
74
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
75
+ const HypothesisBase *hypo = collPair.first;
76
+ cerr << hypo << " ";
77
+ }
78
+ }
79
+
80
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
81
+ ArcList &arcList = *iter->second;
82
+ return arcList;
83
+ }
84
+
85
+ ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
86
+ {
87
+ Coll::iterator iter = m_coll.find(hypo);
88
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
89
+ ArcList &arcList = *iter->second;
90
+
91
+ m_coll.erase(iter);
92
+
93
+ return arcList;
94
+ }
95
+
96
+ void ArcLists::Sort()
97
+ {
98
+ BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
99
+ ArcList &list = *collPair.second;
100
+ std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
101
+ }
102
+ }
103
+
104
+ void ArcLists::Delete(const HypothesisBase *hypo)
105
+ {
106
+ //cerr << "hypo=" << hypo->Debug() << endl;
107
+ //cerr << "m_coll=" << m_coll.size() << endl;
108
+ Coll::iterator iter = m_coll.find(hypo);
109
+ UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
110
+ ArcList *arcList = iter->second;
111
+
112
+ m_coll.erase(iter);
113
+ delete arcList;
114
+ }
115
+
116
+ std::string ArcLists::Debug(const System &system) const
117
+ {
118
+ stringstream strm;
119
+ BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
120
+ const ArcList *arcList = collPair.second;
121
+ strm << arcList << "(" << arcList->size() << ") ";
122
+ }
123
+ return strm.str();
124
+ }
125
+
126
+ }
127
+
mosesdecoder/moses2/Array.h ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include <cassert>
3
+ #include <boost/functional/hash.hpp>
4
+ #include "MemPool.h"
5
+
6
+ namespace Moses2
7
+ {
8
+
9
+ template<typename T>
10
+ class Array
11
+ {
12
+ public:
13
+ typedef T* iterator;
14
+ typedef const T* const_iterator;
15
+ //! iterators
16
+ const_iterator begin() const {
17
+ return m_arr;
18
+ }
19
+ const_iterator end() const {
20
+ return m_arr + m_size;
21
+ }
22
+
23
+ iterator begin() {
24
+ return m_arr;
25
+ }
26
+ iterator end() {
27
+ return m_arr + m_size;
28
+ }
29
+
30
+ Array(MemPool &pool, size_t size = 0, const T &val = T()) {
31
+ m_size = size;
32
+ m_maxSize = size;
33
+ m_arr = pool.Allocate<T>(size);
34
+ for (size_t i = 0; i < size; ++i) {
35
+ m_arr[i] = val;
36
+ }
37
+ }
38
+
39
+ size_t size() const {
40
+ return m_size;
41
+ }
42
+
43
+ const T& operator[](size_t ind) const {
44
+ assert(ind < m_size);
45
+ return m_arr[ind];
46
+ }
47
+
48
+ T& operator[](size_t ind) {
49
+ assert(ind < m_size);
50
+ return m_arr[ind];
51
+ }
52
+
53
+ T *GetArray() {
54
+ return m_arr;
55
+ }
56
+
57
+ size_t hash() const {
58
+ size_t seed = 0;
59
+ for (size_t i = 0; i < m_size; ++i) {
60
+ boost::hash_combine(seed, m_arr[i]);
61
+ }
62
+ return seed;
63
+ }
64
+
65
+ int Compare(const Array &compare) const {
66
+
67
+ int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
68
+ return cmp;
69
+ }
70
+
71
+ bool operator==(const Array &compare) const {
72
+ int cmp = Compare(compare);
73
+ return cmp == 0;
74
+ }
75
+
76
+ void resize(size_t newSize) {
77
+ assert(m_size <= m_maxSize);
78
+ m_size = newSize;
79
+ }
80
+ protected:
81
+ size_t m_size, m_maxSize;
82
+ T *m_arr;
83
+ };
84
+
85
+ }
mosesdecoder/moses2/EstimatedScores.h ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #pragma once
23
+
24
+ #include <iostream>
25
+ #include "legacy/Util2.h"
26
+ #include "legacy/Bitmap.h"
27
+ #include "legacy/Matrix.h"
28
+
29
+ namespace Moses2
30
+ {
31
+ class MemPool;
32
+ class System;
33
+
34
+ //! A square array of floats to store future costs in the phrase-based decoder
35
+ class EstimatedScores: public Matrix<float>
36
+ {
37
+ public:
38
+ EstimatedScores(MemPool &pool, size_t size) :
39
+ Matrix<float>(pool, size, size) {
40
+ }
41
+
42
+ ~EstimatedScores(); // not implemented
43
+
44
+ float CalcEstimatedScore(Bitmap const&) const;
45
+ float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
46
+
47
+ std::ostream &Debug(std::ostream &out, const System &system) const {
48
+ for (size_t endPos = 0; endPos < GetSize(); endPos++) {
49
+ for (size_t startPos = 0; startPos < GetSize(); startPos++)
50
+ out << GetValue(startPos, endPos) << " ";
51
+ out << std::endl;
52
+ }
53
+ return out;
54
+ }
55
+
56
+ };
57
+
58
+ }
59
+
mosesdecoder/moses2/HypothesisBase.cpp ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Hypothesis.cpp
3
+ *
4
+ * Created on: 24 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #include <boost/foreach.hpp>
9
+ #include <stdlib.h>
10
+ #include <deque>
11
+ #include "HypothesisBase.h"
12
+ #include "System.h"
13
+ #include "Scores.h"
14
+ #include "ManagerBase.h"
15
+ #include "MemPool.h"
16
+ #include "FF/StatefulFeatureFunction.h"
17
+
18
+ using namespace std;
19
+
20
+ namespace Moses2
21
+ {
22
+
23
+ //size_t g_numHypos = 0;
24
+
25
+ HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
26
+ {
27
+ m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
28
+ system.featureFunctions.GetNumScores());
29
+
30
+ // FF states
31
+ const std::vector<const StatefulFeatureFunction*> &sfffs =
32
+ system.featureFunctions.GetStatefulFeatureFunctions();
33
+ size_t numStatefulFFs = sfffs.size();
34
+ m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
35
+
36
+ BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
37
+ size_t statefulInd = sfff->GetStatefulInd();
38
+ FFState *state = sfff->BlankState(pool, system);
39
+ m_ffStates[statefulInd] = state;
40
+ }
41
+ }
42
+
43
+ size_t HypothesisBase::hash() const
44
+ {
45
+ return hash(0);
46
+ }
47
+
48
+ size_t HypothesisBase::hash(size_t seed) const
49
+ {
50
+ size_t numStatefulFFs =
51
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
52
+
53
+ // states
54
+ for (size_t i = 0; i < numStatefulFFs; ++i) {
55
+ const FFState *state = m_ffStates[i];
56
+ size_t hash = state->hash();
57
+ boost::hash_combine(seed, hash);
58
+ }
59
+ return seed;
60
+
61
+ }
62
+
63
+ bool HypothesisBase::operator==(const HypothesisBase &other) const
64
+ {
65
+ size_t numStatefulFFs =
66
+ GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
67
+
68
+ // states
69
+ for (size_t i = 0; i < numStatefulFFs; ++i) {
70
+ const FFState &thisState = *m_ffStates[i];
71
+ const FFState &otherState = *other.m_ffStates[i];
72
+ if (thisState != otherState) {
73
+ return false;
74
+ }
75
+ }
76
+ return true;
77
+
78
+ }
79
+
80
+ }
81
+
mosesdecoder/moses2/HypothesisBase.h ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Hypothesis.h
3
+ *
4
+ * Created on: 24 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+
9
+ #include <iostream>
10
+ #include <cstddef>
11
+ #include "FF/FFState.h"
12
+ #include "Scores.h"
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ class ManagerBase;
18
+ class Scores;
19
+
20
+ class HypothesisBase
21
+ {
22
+ public:
23
+ virtual ~HypothesisBase() {
24
+ }
25
+
26
+ inline ManagerBase &GetManager() const {
27
+ return *m_mgr;
28
+ }
29
+
30
+ template<typename T>
31
+ const T &Cast() const {
32
+ return static_cast<const T&>(*this);
33
+ }
34
+
35
+ const Scores &GetScores() const {
36
+ return *m_scores;
37
+ }
38
+ Scores &GetScores() {
39
+ return *m_scores;
40
+ }
41
+
42
+ const FFState *GetState(size_t ind) const {
43
+ return m_ffStates[ind];
44
+ }
45
+ FFState *GetState(size_t ind) {
46
+ return m_ffStates[ind];
47
+ }
48
+
49
+ virtual size_t hash() const;
50
+ virtual size_t hash(size_t seed) const;
51
+ virtual bool operator==(const HypothesisBase &other) const;
52
+
53
+ virtual SCORE GetFutureScore() const = 0;
54
+ virtual void EvaluateWhenApplied() = 0;
55
+
56
+ virtual std::string Debug(const System &system) const = 0;
57
+
58
+ protected:
59
+ ManagerBase *m_mgr;
60
+ Scores *m_scores;
61
+ FFState **m_ffStates;
62
+
63
+ HypothesisBase(MemPool &pool, const System &system);
64
+ };
65
+
66
+ ////////////////////////////////////////////////////////////////////////////////////
67
+ class HypothesisFutureScoreOrderer
68
+ {
69
+ public:
70
+ bool operator()(const HypothesisBase* a, const HypothesisBase* b) const {
71
+ return a->GetFutureScore() > b->GetFutureScore();
72
+ }
73
+ };
74
+
75
+ }
76
+
mosesdecoder/moses2/HypothesisColl.cpp ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * HypothesisColl.cpp
3
+ *
4
+ * Created on: 26 Feb 2016
5
+ * Author: hieu
6
+ */
7
+ #include <iostream>
8
+ #include <sstream>
9
+ #include <algorithm>
10
+ #include <boost/foreach.hpp>
11
+ #include "HypothesisColl.h"
12
+ #include "ManagerBase.h"
13
+ #include "System.h"
14
+ #include "MemPoolAllocator.h"
15
+
16
+ using namespace std;
17
+
18
+ namespace Moses2
19
+ {
20
+
21
+ HypothesisColl::HypothesisColl(const ManagerBase &mgr)
22
+ :m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
23
+ ,m_sortedHypos(NULL)
24
+ {
25
+ m_bestScore = -std::numeric_limits<float>::infinity();
26
+ m_worstScore = std::numeric_limits<float>::infinity();
27
+ }
28
+
29
+ const HypothesisBase *HypothesisColl::GetBestHypo() const
30
+ {
31
+ if (GetSize() == 0) {
32
+ return NULL;
33
+ }
34
+ if (m_sortedHypos) {
35
+ return (*m_sortedHypos)[0];
36
+ }
37
+
38
+ SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
39
+ const HypothesisBase *bestHypo;
40
+ BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
41
+ if (hypo->GetFutureScore() > bestScore) {
42
+ bestScore = hypo->GetFutureScore();
43
+ bestHypo = hypo;
44
+ }
45
+ }
46
+ return bestHypo;
47
+ }
48
+
49
+ void HypothesisColl::Add(
50
+ const ManagerBase &mgr,
51
+ HypothesisBase *hypo,
52
+ Recycler<HypothesisBase*> &hypoRecycle,
53
+ ArcLists &arcLists)
54
+ {
55
+ size_t maxStackSize = mgr.system.options.search.stack_size;
56
+
57
+ if (GetSize() > maxStackSize * 2) {
58
+ //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl;
59
+ PruneHypos(mgr, mgr.arcLists);
60
+ }
61
+
62
+ SCORE futureScore = hypo->GetFutureScore();
63
+
64
+ /*
65
+ cerr << "scores:"
66
+ << futureScore << " "
67
+ << m_bestScore << " "
68
+ << GetSize() << " "
69
+ << endl;
70
+ */
71
+ if (GetSize() >= maxStackSize && futureScore < m_worstScore) {
72
+ // beam threshold or really bad hypo that won't make the pruning cut
73
+ // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point
74
+ //cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl;
75
+ hypoRecycle.Recycle(hypo);
76
+ return;
77
+ }
78
+
79
+ StackAdd added = Add(hypo);
80
+
81
+ size_t nbestSize = mgr.system.options.nbest.nbest_size;
82
+ if (nbestSize) {
83
+ arcLists.AddArc(added.added, hypo, added.other);
84
+ } else {
85
+ if (added.added) {
86
+ if (added.other) {
87
+ hypoRecycle.Recycle(added.other);
88
+ }
89
+ } else {
90
+ hypoRecycle.Recycle(hypo);
91
+ }
92
+ }
93
+
94
+ // update beam variables
95
+ if (added.added) {
96
+ if (futureScore > m_bestScore) {
97
+ m_bestScore = futureScore;
98
+ float beamWidth = mgr.system.options.search.beam_width;
99
+ if ( m_bestScore + beamWidth > m_worstScore ) {
100
+ m_worstScore = m_bestScore + beamWidth;
101
+ }
102
+ } else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
103
+ m_worstScore = futureScore;
104
+ }
105
+ }
106
+ }
107
+
108
+ StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
109
+ {
110
+ std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
111
+ //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
112
+
113
+ // CHECK RECOMBINATION
114
+ if (addRet.second) {
115
+ // equiv hypo doesn't exists
116
+ //cerr << "Added " << hypo << endl;
117
+ return StackAdd(true, NULL);
118
+ } else {
119
+ HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
120
+ //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
121
+
122
+ if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
123
+ // incoming hypo is better than the one we have
124
+ //cerr << "Add " << hypo << "(" << hypo->hash() << ")"
125
+ // << " discard existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
126
+ // << endl;
127
+
128
+ const HypothesisBase * const &hypoExisting1 = *addRet.first;
129
+ const HypothesisBase *&hypoExisting2 =
130
+ const_cast<const HypothesisBase *&>(hypoExisting1);
131
+ hypoExisting2 = hypo;
132
+
133
+ return StackAdd(true, hypoExisting);
134
+ } else {
135
+ // already storing the best hypo. discard incoming hypo
136
+ //cerr << "Keep existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
137
+ // << " discard new " << hypo << "(" << hypo->hash() << ")"
138
+ // << endl;
139
+ return StackAdd(false, hypoExisting);
140
+ }
141
+ }
142
+
143
+ //assert(false);
144
+ }
145
+
146
+ const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos(
147
+ const ManagerBase &mgr,
148
+ ArcLists &arcLists) const
149
+ {
150
+ if (m_sortedHypos == NULL) {
151
+ // create sortedHypos first
152
+ MemPool &pool = mgr.GetPool();
153
+ m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
154
+ m_coll.size());
155
+
156
+ SortHypos(mgr, m_sortedHypos->GetArray());
157
+
158
+ // prune
159
+ Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
160
+
161
+ size_t maxStackSize = mgr.system.options.search.stack_size;
162
+ if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
163
+ for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
164
+ HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
165
+ recycler.Recycle(hypo);
166
+
167
+ // delete from arclist
168
+ if (mgr.system.options.nbest.nbest_size) {
169
+ arcLists.Delete(hypo);
170
+ }
171
+ }
172
+ m_sortedHypos->resize(maxStackSize);
173
+ }
174
+
175
+ }
176
+
177
+ return *m_sortedHypos;
178
+ }
179
+
180
+ void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
181
+ {
182
+ size_t maxStackSize = mgr.system.options.search.stack_size;
183
+
184
+ Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
185
+
186
+ const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *));
187
+ SortHypos(mgr, sortedHypos);
188
+
189
+ // update worse score
190
+ m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore();
191
+
192
+ // prune
193
+ for (size_t i = maxStackSize; i < GetSize(); ++i) {
194
+ HypothesisBase *hypo = const_cast<HypothesisBase*>(sortedHypos[i]);
195
+
196
+ // delete from arclist
197
+ if (mgr.system.options.nbest.nbest_size) {
198
+ arcLists.Delete(hypo);
199
+ }
200
+
201
+ // delete from collection
202
+ Delete(hypo);
203
+
204
+ recycler.Recycle(hypo);
205
+ }
206
+
207
+ }
208
+
209
+ void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const
210
+ {
211
+ size_t maxStackSize = mgr.system.options.search.stack_size;
212
+ //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that
213
+ //assert(GetSize() > maxStackSize);
214
+ //assert(sortedHypos.size() == GetSize());
215
+
216
+ /*
217
+ cerr << "UNSORTED hypos: ";
218
+ BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
219
+ cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
220
+ }
221
+ cerr << endl;
222
+ */
223
+ size_t ind = 0;
224
+ BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
225
+ sortedHypos[ind] = hypo;
226
+ ++ind;
227
+ }
228
+
229
+ size_t indMiddle;
230
+ if (maxStackSize == 0) {
231
+ indMiddle = GetSize();
232
+ } else if (GetSize() > maxStackSize) {
233
+ indMiddle = maxStackSize;
234
+ } else {
235
+ // GetSize() <= maxStackSize
236
+ indMiddle = GetSize();
237
+ }
238
+
239
+ const HypothesisBase **iterMiddle = sortedHypos + indMiddle;
240
+
241
+ std::partial_sort(
242
+ sortedHypos,
243
+ iterMiddle,
244
+ sortedHypos + GetSize(),
245
+ HypothesisFutureScoreOrderer());
246
+
247
+ /*
248
+ cerr << "sorted hypos: ";
249
+ for (size_t i = 0; i < sortedHypos.size(); ++i) {
250
+ const HypothesisBase *hypo = sortedHypos[i];
251
+ cerr << hypo << " ";
252
+ }
253
+ cerr << endl;
254
+ */
255
+ }
256
+
257
+ void HypothesisColl::Delete(const HypothesisBase *hypo)
258
+ {
259
+ //cerr << " Delete hypo=" << hypo << "(" << hypo->hash() << ")"
260
+ // << " m_coll=" << m_coll.size() << endl;
261
+
262
+ size_t erased = m_coll.erase(hypo);
263
+ UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo);
264
+ }
265
+
266
+ void HypothesisColl::Clear()
267
+ {
268
+ m_sortedHypos = NULL;
269
+ m_coll.clear();
270
+
271
+ m_bestScore = -std::numeric_limits<float>::infinity();
272
+ m_worstScore = std::numeric_limits<float>::infinity();
273
+ }
274
+
275
+ std::string HypothesisColl::Debug(const System &system) const
276
+ {
277
+ stringstream out;
278
+ BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
279
+ out << hypo->Debug(system);
280
+ out << std::endl << std::endl;
281
+ }
282
+
283
+ return out.str();
284
+ }
285
+
286
+ } /* namespace Moses2 */
mosesdecoder/moses2/HypothesisColl.h ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * HypothesisColl.h
3
+ *
4
+ * Created on: 26 Feb 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include <unordered_set>
9
+ #include "HypothesisBase.h"
10
+ #include "MemPoolAllocator.h"
11
+ #include "Recycler.h"
12
+ #include "Array.h"
13
+ #include "legacy/Util2.h"
14
+
15
+ namespace Moses2
16
+ {
17
+
18
+ class ManagerBase;
19
+ class ArcLists;
20
+
21
+ typedef Array<const HypothesisBase*> Hypotheses;
22
+
23
+ ////////////////////////////////////////////////////
24
+ class HypothesisColl
25
+ {
26
+ public:
27
+ HypothesisColl(const ManagerBase &mgr);
28
+
29
+ void Add(const ManagerBase &mgr,
30
+ HypothesisBase *hypo,
31
+ Recycler<HypothesisBase*> &hypoRecycle,
32
+ ArcLists &arcLists);
33
+
34
+ size_t GetSize() const {
35
+ return m_coll.size();
36
+ }
37
+
38
+ void Clear();
39
+
40
+ const Hypotheses &GetSortedAndPrunedHypos(
41
+ const ManagerBase &mgr,
42
+ ArcLists &arcLists) const;
43
+
44
+ const HypothesisBase *GetBestHypo() const;
45
+
46
+ template<typename T>
47
+ const T *GetBestHypo() const {
48
+ const HypothesisBase *hypo = GetBestHypo();
49
+ return hypo ? &hypo->Cast<T>() : NULL;
50
+ }
51
+
52
+ void Delete(const HypothesisBase *hypo);
53
+
54
+ std::string Debug(const System &system) const;
55
+
56
+ protected:
57
+ typedef std::unordered_set<const HypothesisBase*,
58
+ UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
59
+ MemPoolAllocator<const HypothesisBase*> > _HCType;
60
+
61
+ _HCType m_coll;
62
+ mutable Hypotheses *m_sortedHypos;
63
+
64
+ SCORE m_bestScore;
65
+ SCORE m_worstScore;
66
+
67
+ StackAdd Add(const HypothesisBase *hypo);
68
+
69
+ void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists);
70
+ void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const;
71
+
72
+ };
73
+
74
+ } /* namespace Moses2 */
75
+
mosesdecoder/moses2/InputPathsBase.h ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * InputPaths.h
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <vector>
11
+ #include "MemPool.h"
12
+
13
+ namespace Moses2
14
+ {
15
+
16
+ class InputType;
17
+ class System;
18
+ class ManagerBase;
19
+ class InputPathBase;
20
+
21
+ class InputPathsBase
22
+ {
23
+ typedef std::vector<InputPathBase*> Coll;
24
+ public:
25
+ InputPathsBase() {
26
+ }
27
+ virtual ~InputPathsBase();
28
+
29
+ //! iterators
30
+ typedef Coll::iterator iterator;
31
+ typedef Coll::const_iterator const_iterator;
32
+
33
+ const_iterator begin() const {
34
+ return m_inputPaths.begin();
35
+ }
36
+ const_iterator end() const {
37
+ return m_inputPaths.end();
38
+ }
39
+
40
+ iterator begin() {
41
+ return m_inputPaths.begin();
42
+ }
43
+ iterator end() {
44
+ return m_inputPaths.end();
45
+ }
46
+
47
+ virtual void Init(const InputType &input, const ManagerBase &mgr) = 0;
48
+
49
+ protected:
50
+ Coll m_inputPaths;
51
+ };
52
+
53
+ }
54
+
mosesdecoder/moses2/Main.cpp ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <memory>
3
+ #include <boost/pool/pool_alloc.hpp>
4
+ #include "Main.h"
5
+ #include "System.h"
6
+ #include "Phrase.h"
7
+ #include "TranslationTask.h"
8
+ #include "MemPoolAllocator.h"
9
+ #ifdef HAVE_XMLRPC_C
10
+ #include "server/Server.h"
11
+ #endif // HAVE_XMLRPC_C
12
+
13
+ #include "legacy/InputFileStream.h"
14
+ #include "legacy/Parameter.h"
15
+ #include "legacy/ThreadPool.h"
16
+ #include "legacy/Timer.h"
17
+ #include "legacy/Util2.h"
18
+ #include "util/usage.hh"
19
+
20
+ //#include <vld.h>
21
+
22
+ using namespace std;
23
+
24
+ //extern size_t g_numHypos;
25
+
26
+ int main(int argc, char** argv)
27
+ {
28
+ cerr << "Starting..." << endl;
29
+
30
+ Moses2::Timer timer;
31
+ timer.start();
32
+ //Temp();
33
+
34
+ Moses2::Parameter params;
35
+ if (!params.LoadParam(argc, argv)) {
36
+ return EXIT_FAILURE;
37
+ }
38
+ Moses2::System system(params);
39
+ timer.check("Loaded");
40
+
41
+ if (params.GetParam("show-weights")) {
42
+ return EXIT_SUCCESS;
43
+ }
44
+
45
+ //cerr << "system.numThreads=" << system.options.server.numThreads << endl;
46
+ Moses2::ThreadPool pool(system.options.server.numThreads, system.cpuAffinityOffset, system.cpuAffinityOffsetIncr);
47
+ //cerr << "CREATED POOL" << endl;
48
+
49
+ if (params.GetParam("server")) {
50
+ std::cerr << "RUN SERVER" << std::endl;
51
+ run_as_server(system);
52
+ }
53
+ else {
54
+ std::cerr << "RUN BATCH" << std::endl;
55
+ batch_run(params, system, pool);
56
+ }
57
+
58
+ cerr << "Decoding took " << timer.get_elapsed_time() << endl;
59
+ // cerr << "g_numHypos=" << g_numHypos << endl;
60
+ cerr << "Finished" << endl;
61
+ return EXIT_SUCCESS;
62
+ }
63
+
64
+ ////////////////////////////////////////////////////////////////////////////////////////////////
65
+ void run_as_server(Moses2::System& system)
66
+ {
67
+ #ifdef HAVE_XMLRPC_C
68
+ Moses2::Server server(system.options.server, system);
69
+ server.run(system); // actually: don't return. see Server::run()
70
+ #else
71
+ UTIL_THROW2("Moses2 was compiled without xmlrpc-c. "
72
+ << "No server functionality available.");
73
+ #endif
74
+ }
75
+
76
+ ////////////////////////////////////////////////////////////////////////////////////////////////
77
+ istream &GetInputStream(Moses2::Parameter &params)
78
+ {
79
+ const Moses2::PARAM_VEC *vec = params.GetParam("input-file");
80
+ if (vec && vec->size()) {
81
+ Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0));
82
+ return *stream;
83
+ } else {
84
+ return cin;
85
+ }
86
+ }
87
+
88
+ ////////////////////////////////////////////////////////////////////////////////////////////////
89
+
90
+ void batch_run(Moses2::Parameter& params, Moses2::System& system, Moses2::ThreadPool& pool)
91
+ {
92
+ istream& inStream = GetInputStream(params);
93
+
94
+ long translationId = 0;
95
+ string line;
96
+ while (getline(inStream, line)) {
97
+ //cerr << "line=" << line << endl;
98
+ boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
99
+
100
+ //cerr << "START pool.Submit()" << endl;
101
+ pool.Submit(task);
102
+ //task->Run();
103
+ ++translationId;
104
+ }
105
+
106
+ pool.Stop(true);
107
+
108
+ if (&inStream != &cin) {
109
+ delete& inStream;
110
+ }
111
+
112
+ //util::PrintUsage(std::cerr);
113
+
114
+ }
115
+
116
+ ////////////////////////////////////////////////////////////////////////////////////////////////
mosesdecoder/moses2/Main.h ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Main.h
3
+ *
4
+ * Created on: 1 Apr 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+ #include <iostream>
9
+
10
+ namespace Moses2
11
+ {
12
+ class Parameter;
13
+ class System;
14
+ class ThreadPool;
15
+ }
16
+
17
+ std::istream &GetInputStream(Moses2::Parameter &params);
18
+ void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::ThreadPool &pool);
19
+ void run_as_server(Moses2::System &system);
20
+
21
+ void Temp();
22
+
23
+
mosesdecoder/moses2/ManagerBase.cpp ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Manager.cpp
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #include <boost/foreach.hpp>
8
+ #include <vector>
9
+ #include <sstream>
10
+ #include "System.h"
11
+ #include "ManagerBase.h"
12
+ #include "Phrase.h"
13
+ #include "InputPathsBase.h"
14
+ #include "InputPathBase.h"
15
+ #include "TranslationModel/PhraseTable.h"
16
+ #include "legacy/Range.h"
17
+ #include "PhraseBased/Sentence.h"
18
+
19
+ using namespace std;
20
+
21
+ namespace Moses2
22
+ {
23
+ ManagerBase::ManagerBase(System &sys, const TranslationTask &task,
24
+ const std::string &inputStr, long translationId)
25
+ :system(sys)
26
+ ,task(task)
27
+ ,m_inputStr(inputStr)
28
+ ,m_translationId(translationId)
29
+ ,m_pool(NULL)
30
+ ,m_systemPool(NULL)
31
+ ,m_hypoRecycler(NULL)
32
+ ,m_input(NULL)
33
+ {
34
+ }
35
+
36
+ ManagerBase::~ManagerBase()
37
+ {
38
+ system.featureFunctions.CleanUpAfterSentenceProcessing(*m_input);
39
+
40
+ GetPool().Reset();
41
+ GetHypoRecycler().Clear();
42
+ }
43
+
44
+ void ManagerBase::InitPools()
45
+ {
46
+ m_pool = &system.GetManagerPool();
47
+ m_systemPool = &system.GetSystemPool();
48
+ m_hypoRecycler = &system.GetHypoRecycler();
49
+ //cerr << "pool size " << m_pool->Size() << " " << m_systemPool->Size() << endl;
50
+ }
51
+
52
+ }
53
+
mosesdecoder/moses2/ManagerBase.h ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Manager.h
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <queue>
11
+ #include <cstddef>
12
+ #include <string>
13
+ #include <deque>
14
+ #include "Phrase.h"
15
+ #include "MemPool.h"
16
+ #include "Recycler.h"
17
+ #include "EstimatedScores.h"
18
+ #include "ArcLists.h"
19
+ #include "legacy/Bitmaps.h"
20
+
21
+ namespace Moses2
22
+ {
23
+
24
+ class System;
25
+ class TranslationTask;
26
+ class PhraseImpl;
27
+ class SearchNormal;
28
+ class Search;
29
+ class InputType;
30
+ class OutputCollector;
31
+ class HypothesisBase;
32
+
33
+ class ManagerBase
34
+ {
35
+ public:
36
+ System &system;
37
+ const TranslationTask &task;
38
+ mutable ArcLists arcLists;
39
+
40
+ ManagerBase(System &sys, const TranslationTask &task,
41
+ const std::string &inputStr, long translationId);
42
+ virtual ~ManagerBase();
43
+ virtual void Decode() = 0;
44
+ virtual std::string OutputBest() const = 0;
45
+ virtual std::string OutputNBest() = 0;
46
+ virtual std::string OutputTransOpt() = 0;
47
+
48
+ MemPool &GetPool() const {
49
+ return *m_pool;
50
+ }
51
+
52
+ MemPool &GetSystemPool() const {
53
+ return *m_systemPool;
54
+ }
55
+
56
+ Recycler<HypothesisBase*> &GetHypoRecycler() const {
57
+ return *m_hypoRecycler;
58
+ }
59
+
60
+ const InputType &GetInput() const {
61
+ return *m_input;
62
+ }
63
+
64
+ long GetTranslationId() const {
65
+ return m_translationId;
66
+ }
67
+
68
+ protected:
69
+ std::string m_inputStr;
70
+ long m_translationId;
71
+ InputType *m_input;
72
+
73
+ mutable MemPool *m_pool, *m_systemPool;
74
+ mutable Recycler<HypothesisBase*> *m_hypoRecycler;
75
+
76
+ void InitPools();
77
+
78
+ };
79
+
80
+ }
81
+
mosesdecoder/moses2/MemPool.h ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * MemPool.h
3
+ *
4
+ * Created on: 28 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <algorithm>
11
+ #include <iostream>
12
+ #include <vector>
13
+ #include <stdint.h>
14
+ #include <stdlib.h>
15
+ #include <limits>
16
+ #include <iostream>
17
+
18
+ namespace Moses2
19
+ {
20
+
21
+ class MemPool
22
+ {
23
+ struct Page {
24
+ uint8_t *mem;
25
+ uint8_t *end;
26
+ size_t size;
27
+
28
+ Page() = delete;
29
+ Page(std::size_t size);
30
+ ~Page();
31
+ };
32
+
33
+ public:
34
+ MemPool(std::size_t initSize = 10240);
35
+
36
+ ~MemPool();
37
+
38
+ uint8_t* Allocate(std::size_t size);
39
+
40
+ template<typename T>
41
+ T *Allocate() {
42
+ uint8_t *ret = Allocate(sizeof(T));
43
+ return (T*) ret;
44
+ }
45
+
46
+ template<typename T>
47
+ T *Allocate(size_t num) {
48
+ size_t size = sizeof(T);
49
+ size_t m = size % 16;
50
+ size += m;
51
+
52
+ uint8_t *ret = Allocate(size * num);
53
+ return (T*) ret;
54
+ }
55
+
56
+ // re-use pool
57
+ void Reset();
58
+
59
+ size_t Size();
60
+
61
+ private:
62
+ uint8_t *More(std::size_t size);
63
+
64
+ std::vector<Page*> m_pages;
65
+
66
+ size_t m_currSize;
67
+ size_t m_currPage;
68
+ uint8_t *current_;
69
+
70
+ // no copying
71
+ MemPool(const MemPool &) = delete;
72
+ MemPool &operator=(const MemPool &) = delete;
73
+ };
74
+
75
+
76
+ }
77
+
mosesdecoder/moses2/MemPoolAllocator.h ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include "MemPool.h"
3
+
4
+ namespace Moses2
5
+ {
6
+
7
+ template<typename T>
8
+ class MemPoolAllocator
9
+ {
10
+ public:
11
+ typedef T value_type;
12
+ typedef T* pointer;
13
+ typedef const T* const_pointer;
14
+ typedef T& reference;
15
+ typedef const T& const_reference;
16
+ typedef std::size_t size_type;
17
+ typedef std::ptrdiff_t difference_type;
18
+
19
+ template<class U>
20
+ struct rebind {
21
+ typedef MemPoolAllocator<U> other;
22
+ };
23
+
24
+ MemPoolAllocator(Moses2::MemPool &pool) :
25
+ m_pool(pool) {
26
+ }
27
+ MemPoolAllocator(const MemPoolAllocator &other) :
28
+ m_pool(other.m_pool) {
29
+ }
30
+
31
+ template<class U>
32
+ MemPoolAllocator(const MemPoolAllocator<U>& other) :
33
+ m_pool(other.m_pool) {
34
+ }
35
+
36
+ size_type max_size() const {
37
+ return std::numeric_limits<size_type>::max();
38
+ }
39
+
40
+ void deallocate(pointer p, size_type n) {
41
+ //std::cerr << "deallocate " << p << " " << n << std::endl;
42
+ }
43
+
44
+ pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
45
+ //std::cerr << "allocate " << n << " " << hint << std::endl;
46
+ pointer ret = m_pool.Allocate<T>(n);
47
+ return ret;
48
+ }
49
+
50
+ void construct(pointer p, const_reference val) {
51
+ //std::cerr << "construct " << p << " " << n << std::endl;
52
+ new ((void *) p) T(val);
53
+ }
54
+
55
+ void destroy(pointer p) {
56
+ //std::cerr << "destroy " << p << " " << n << std::endl;
57
+ }
58
+
59
+ // return address of values
60
+ pointer address (reference value) const {
61
+ return &value;
62
+ }
63
+ const_pointer address (const_reference value) const {
64
+ return &value;
65
+ }
66
+
67
+ bool operator==(const MemPoolAllocator<T> &allocator) const {
68
+ return true;
69
+ }
70
+
71
+ bool operator!=(const MemPoolAllocator<T> &allocator) const {
72
+ return false;
73
+ }
74
+
75
+ MemPoolAllocator<T>& operator=(const MemPoolAllocator<T>& allocator) {
76
+ return *this;
77
+ }
78
+
79
+ MemPool &m_pool;
80
+ protected:
81
+ };
82
+
83
+ }
84
+
85
+
mosesdecoder/moses2/Moses2Wrapper.h ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include <string>
3
+
4
+ namespace Moses2 {
5
+ class Parameter;
6
+ class System;
7
+ extern "C" {
8
+ enum MosesApiErrorCode {
9
+ MS_API_OK,
10
+ MS_API_E_FAILURE,
11
+ MS_API_E_INPUT,
12
+ MS_API_E_TIMEOUT
13
+ };
14
+ }
15
+ class Moses2Wrapper
16
+ {
17
+ Parameter* m_param;
18
+ System* m_system;
19
+
20
+ public:
21
+ Moses2Wrapper(const std::string& filePath);
22
+ ~Moses2Wrapper();
23
+ std::string Translate(const std::string& input, long id, bool nbest);
24
+ void UpdateLMPath(const std::string& filePath);
25
+
26
+ static char* CopyString(const char* str);
27
+ static void Free(void* ptr);
28
+ };
29
+
30
+ }
mosesdecoder/moses2/Phrase.h ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * PhraseImpl.h
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstddef>
11
+ #include <string>
12
+ #include <sstream>
13
+ #include <iostream>
14
+ #include "Word.h"
15
+ #include "MemPool.h"
16
+ #include "TypeDef.h"
17
+ #include "legacy/FactorCollection.h"
18
+ #include "SCFG/Word.h"
19
+ #include <boost/functional/hash.hpp>
20
+
21
+ namespace Moses2
22
+ {
23
+
24
+ template<typename WORD>
25
+ class SubPhrase;
26
+
27
+ class Scores;
28
+ class PhraseTable;
29
+ class MemPool;
30
+ class System;
31
+
32
+ template<typename WORD>
33
+ class Phrase
34
+ {
35
+ public:
36
+ virtual ~Phrase() {
37
+ }
38
+ virtual const WORD& operator[](size_t pos) const = 0;
39
+ virtual size_t GetSize() const = 0;
40
+
41
+ virtual const WORD& Back() const {
42
+ assert(GetSize());
43
+ return (*this)[GetSize() - 1];
44
+ }
45
+
46
+ virtual size_t hash() const {
47
+ size_t seed = 0;
48
+
49
+ for (size_t i = 0; i < GetSize(); ++i) {
50
+ const WORD &word = (*this)[i];
51
+ size_t wordHash = word.hash();
52
+ boost::hash_combine(seed, wordHash);
53
+ }
54
+
55
+ return seed;
56
+ }
57
+
58
+ virtual bool operator==(const Phrase &compare) const {
59
+ if (GetSize() != compare.GetSize()) {
60
+ return false;
61
+ }
62
+
63
+ for (size_t i = 0; i < GetSize(); ++i) {
64
+ const WORD &word = (*this)[i];
65
+ const WORD &otherWord = compare[i];
66
+ if (word != otherWord) {
67
+ return false;
68
+ }
69
+ }
70
+
71
+ return true;
72
+ }
73
+
74
+ virtual bool operator!=(const Phrase &compare) const {
75
+ return !((*this) == compare);
76
+ }
77
+
78
+ virtual std::string GetString(const FactorList &factorTypes) const {
79
+ if (GetSize() == 0) {
80
+ return "";
81
+ }
82
+
83
+ std::stringstream ret;
84
+
85
+ const WORD &word = (*this)[0];
86
+ ret << word.GetString(factorTypes);
87
+ for (size_t i = 1; i < GetSize(); ++i) {
88
+ const WORD &word = (*this)[i];
89
+ ret << " " << word.GetString(factorTypes);
90
+ }
91
+ return ret.str();
92
+ }
93
+
94
+ virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
95
+
96
+ virtual std::string Debug(const System &system) const {
97
+ std::stringstream out;
98
+ size_t size = GetSize();
99
+ if (size) {
100
+ out << (*this)[0].Debug(system);
101
+ for (size_t i = 1; i < size; ++i) {
102
+ const WORD &word = (*this)[i];
103
+ out << " " << word.Debug(system);
104
+ }
105
+ }
106
+
107
+ return out.str();
108
+ }
109
+
110
+ virtual void OutputToStream(const System &system, std::ostream &out) const {
111
+ size_t size = GetSize();
112
+ if (size) {
113
+ (*this)[0].OutputToStream(system, out);
114
+ for (size_t i = 1; i < size; ++i) {
115
+ const WORD &word = (*this)[i];
116
+ out << " ";
117
+ word.OutputToStream(system, out);
118
+ }
119
+ }
120
+ }
121
+
122
+
123
+ };
124
+
125
+ ////////////////////////////////////////////////////////////////////////
126
+ template<typename WORD>
127
+ class PhraseOrdererLexical
128
+ {
129
+ public:
130
+ bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const {
131
+ size_t minSize = std::min(a.GetSize(), b.GetSize());
132
+ for (size_t i = 0; i < minSize; ++i) {
133
+ const Word &aWord = a[i];
134
+ const Word &bWord = b[i];
135
+ int cmp = aWord.Compare(bWord);
136
+ //std::cerr << "WORD: " << aWord << " ||| " << bWord << " ||| " << lessThan << std::endl;
137
+ if (cmp) {
138
+ return (cmp < 0);
139
+ }
140
+ }
141
+ return a.GetSize() < b.GetSize();
142
+ }
143
+ };
144
+
145
+ }
146
+
mosesdecoder/moses2/Recycler.cpp ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Recycler.cpp
3
+ *
4
+ * Created on: 2 Jan 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "Recycler.h"
9
+
10
+ namespace Moses2
11
+ {
12
+
13
+ } /* namespace Moses2 */
mosesdecoder/moses2/Scores.h ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Scores.h
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #pragma once
9
+ #include <iostream>
10
+ #include <string>
11
+ #include "TypeDef.h"
12
+ #include "MemPool.h"
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ class FeatureFunction;
18
+ class FeatureFunctions;
19
+ class System;
20
+
21
+ class Scores
22
+ {
23
+ public:
24
+ Scores(const System &system, MemPool &pool, size_t numScores);
25
+ Scores(const System &system, MemPool &pool, size_t numScores,
26
+ const Scores &origScores);
27
+
28
+ virtual ~Scores();
29
+
30
+ SCORE GetTotalScore() const {
31
+ return m_total;
32
+ }
33
+
34
+ const SCORE *GetScores(const FeatureFunction &featureFunction) const;
35
+
36
+ void Reset(const System &system);
37
+
38
+ void CreateFromString(const std::string &str,
39
+ const FeatureFunction &featureFunction, const System &system,
40
+ bool transformScores);
41
+
42
+ void PlusEquals(const System &system, const FeatureFunction &featureFunction,
43
+ const SCORE &score);
44
+
45
+ void PlusEquals(const System &system, const FeatureFunction &featureFunction,
46
+ const SCORE &score, size_t offset);
47
+
48
+ void PlusEquals(const System &system, const FeatureFunction &featureFunction,
49
+ const std::vector<SCORE> &scores);
50
+
51
+ void PlusEquals(const System &system, const FeatureFunction &featureFunction,
52
+ SCORE scores[]);
53
+
54
+ void PlusEquals(const System &system, const Scores &scores);
55
+
56
+ void MinusEquals(const System &system, const Scores &scores);
57
+
58
+ void Assign(const System &system, const FeatureFunction &featureFunction,
59
+ const SCORE &score);
60
+
61
+ void Assign(const System &system, const FeatureFunction &featureFunction,
62
+ const std::vector<SCORE> &scores);
63
+
64
+ std::string Debug(const System &system) const;
65
+
66
+ void OutputBreakdown(std::ostream &out, const System &system) const;
67
+
68
+ // static functions to work out estimated scores
69
+ static SCORE CalcWeightedScore(const System &system,
70
+ const FeatureFunction &featureFunction, SCORE scores[]);
71
+
72
+ static SCORE CalcWeightedScore(const System &system,
73
+ const FeatureFunction &featureFunction, SCORE score);
74
+
75
+ protected:
76
+ SCORE *m_scores;
77
+ SCORE m_total;
78
+ };
79
+
80
+ }
81
+
mosesdecoder/moses2/SubPhrase.h ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include <sstream>
3
+ #include "Phrase.h"
4
+ #include "Word.h"
5
+ #include "SCFG/Word.h"
6
+
7
+ namespace Moses2
8
+ {
9
+ class System;
10
+
11
+ template<typename WORD>
12
+ class SubPhrase: public Phrase<WORD>
13
+ {
14
+ public:
15
+ SubPhrase(const Phrase<WORD> &origPhrase, size_t start, size_t size)
16
+ :m_origPhrase(&origPhrase)
17
+ ,m_start(start)
18
+ ,m_size(size)
19
+ {}
20
+
21
+ virtual const WORD& operator[](size_t pos) const {
22
+ return (*m_origPhrase)[pos + m_start];
23
+ }
24
+
25
+ virtual size_t GetSize() const {
26
+ return m_size;
27
+ }
28
+
29
+ SubPhrase GetSubPhrase(size_t start, size_t size) const {
30
+ SubPhrase ret(*m_origPhrase, m_start + start, size);
31
+ return ret;
32
+ }
33
+
34
+ virtual std::string Debug(const System &system) const {
35
+ std::stringstream out;
36
+ if (GetSize()) {
37
+ out << (*this)[0].Debug(system);
38
+ for (size_t i = 1; i < GetSize(); ++i) {
39
+ const WORD &word = (*this)[i];
40
+ out << " " << word.Debug(system);
41
+ }
42
+ }
43
+
44
+ return out.str();
45
+ }
46
+
47
+ protected:
48
+ const Phrase<WORD> *m_origPhrase;
49
+ size_t m_start, m_size;
50
+ };
51
+
52
+
53
+ }
54
+
mosesdecoder/moses2/TargetPhrase.cpp ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * TargetPhrase.cpp
3
+ *
4
+ * Created on: 26 Apr 2016
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "TargetPhrase.h"
9
+ #include "System.h"
10
+ #include "Scores.h"
11
+
12
+ namespace Moses2
13
+ {
14
+
15
+ } /* namespace Moses2 */
mosesdecoder/moses2/TranslationTask.cpp ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "TranslationTask.h"
2
+ #include "System.h"
3
+ #include "InputType.h"
4
+ #include "PhraseBased/Manager.h"
5
+ #include "SCFG/Manager.h"
6
+
7
+ using namespace std;
8
+
9
+ namespace Moses2
10
+ {
11
+
12
+ TranslationTask::TranslationTask(System &system,
13
+ const std::string &line,
14
+ long translationId)
15
+ {
16
+ if (system.isPb) {
17
+ m_mgr = new Manager(system, *this, line, translationId);
18
+ } else {
19
+ m_mgr = new SCFG::Manager(system, *this, line, translationId);
20
+ }
21
+ }
22
+
23
+ TranslationTask::~TranslationTask()
24
+ {
25
+ }
26
+
27
+ std::string TranslationTask::ReturnTranslation(bool nbest) const
28
+ {
29
+ m_mgr->Decode();
30
+ string out;
31
+ if (nbest) {
32
+ out = m_mgr->OutputNBest() + "\n";
33
+ }
34
+ else {
35
+ out = m_mgr->OutputBest() + "\n";
36
+ }
37
+ delete m_mgr;
38
+ return out;
39
+ }
40
+
41
+ void TranslationTask::Run()
42
+ {
43
+
44
+ m_mgr->Decode();
45
+
46
+ string out;
47
+
48
+ out = m_mgr->OutputBest() + "\n";
49
+ m_mgr->system.bestCollector->Write(m_mgr->GetTranslationId(), out);
50
+
51
+ if (m_mgr->system.options.nbest.nbest_size) {
52
+ out = m_mgr->OutputNBest();
53
+ m_mgr->system.nbestCollector->Write(m_mgr->GetTranslationId(), out);
54
+ }
55
+
56
+ if (!m_mgr->system.options.output.detailed_transrep_filepath.empty()) {
57
+ out = m_mgr->OutputTransOpt();
58
+ m_mgr->system.detailedTranslationCollector->Write(m_mgr->GetTranslationId(), out);
59
+ }
60
+
61
+ delete m_mgr;
62
+ }
63
+
64
+ }
65
+
mosesdecoder/moses2/TrellisPaths.h ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * TrellisPaths.h
3
+ *
4
+ * Created on: 16 Mar 2016
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+
9
+ #include <vector>
10
+ #include <queue>
11
+ #include "PhraseBased/TrellisPath.h"
12
+
13
+ namespace Moses2
14
+ {
15
+
16
+ template<typename T>
17
+ struct CompareTrellisPath {
18
+ bool operator()(const T* pathA, const T* pathB) const {
19
+ return (pathA->GetFutureScore() < pathB->GetFutureScore());
20
+ }
21
+ };
22
+
23
+ template<typename T>
24
+ class TrellisPaths
25
+ {
26
+ public:
27
+ TrellisPaths() {}
28
+
29
+ virtual ~TrellisPaths() {
30
+ while (!empty()) {
31
+ T *path = Get();
32
+ delete path;
33
+ }
34
+ }
35
+
36
+ bool empty() const {
37
+ return m_coll.empty();
38
+ }
39
+
40
+ //! add a new entry into collection
41
+ void Add(T *trellisPath) {
42
+ m_coll.push(trellisPath);
43
+ }
44
+
45
+ T *Get() {
46
+ T *top = m_coll.top();
47
+
48
+ // Detach
49
+ m_coll.pop();
50
+ return top;
51
+ }
52
+
53
+ size_t GetSize() const {
54
+ return m_coll.size();
55
+ }
56
+
57
+ protected:
58
+ typedef std::priority_queue<T*, std::vector<T*>,
59
+ CompareTrellisPath<T> > CollectionType;
60
+ CollectionType m_coll;
61
+ };
62
+
63
+ } /* namespace Moses2 */
64
+
mosesdecoder/moses2/TypeDef.h ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * TypeDef.h
3
+ *
4
+ * Created on: 23 Oct 2015
5
+ * Author: hieu
6
+ */
7
+ #pragma once
8
+
9
+ #include <cstddef>
10
+ #include <vector>
11
+ #include <istream>
12
+ #include "Vector.h"
13
+
14
+ namespace Moses2
15
+ {
16
+
17
+ class HypothesisBase;
18
+
19
+ #define NOT_FOUND std::numeric_limits<size_t>::max()
20
+ const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
21
+ const size_t DEFAULT_MAX_CHART_SPAN = 20;
22
+ const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
23
+ const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
24
+ const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
25
+ const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
26
+
27
+ const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
28
+ const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
29
+ const float LOWEST_SCORE = -100.0f;
30
+
31
+ const float DEFAULT_BEAM_WIDTH = 0.00001f;
32
+ const float DEFAULT_EARLY_DISCARDING_THRESHOLD = 0.0f;
33
+ const float DEFAULT_TRANSLATION_OPTION_THRESHOLD = 0.0f;
34
+
35
+ #ifndef BOS_
36
+ #define BOS_ "<s>" //Beginning of sentence symbol
37
+ #endif
38
+ #ifndef EOS_
39
+ #define EOS_ "</s>" //End of sentence symbol
40
+ #endif
41
+
42
+ typedef size_t FactorType;
43
+ typedef float SCORE;
44
+ typedef std::vector<FactorType> FactorList;
45
+
46
+ // Note: StaticData uses SearchAlgorithm to determine whether the translation
47
+ // model is phrase-based or syntax-based. If you add a syntax-based search
48
+ // algorithm here then you should also update StaticData::IsSyntax().
49
+ enum SearchAlgorithm {
50
+ Normal = 0, CubePruning = 1,
51
+ //,CubeGrowing = 2
52
+ CYKPlus = 3,
53
+ NormalBatch = 4,
54
+ ChartIncremental = 5,
55
+ SyntaxS2T = 6,
56
+ SyntaxT2S = 7,
57
+ SyntaxT2S_SCFG = 8,
58
+ SyntaxF2S = 9,
59
+ CubePruningPerMiniStack = 10,
60
+ CubePruningPerBitmap = 11,
61
+ CubePruningCardinalStack = 12,
62
+ CubePruningBitmapStack = 13,
63
+ CubePruningMiniStack = 14,
64
+ DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
65
+ };
66
+
67
+ enum InputTypeEnum {
68
+ SentenceInput = 0,
69
+ ConfusionNetworkInput = 1,
70
+ WordLatticeInput = 2,
71
+ TreeInputType = 3,
72
+ //,WordLatticeInput2 = 4,
73
+ TabbedSentenceInput = 5,
74
+ ForestInputType = 6,
75
+ SentenceInputWithCandidates = 7,
76
+ };
77
+
78
+ enum XmlInputType {
79
+ XmlPassThrough = 0,
80
+ XmlIgnore = 1,
81
+ XmlExclusive = 2,
82
+ XmlInclusive = 3,
83
+ XmlConstraint = 4
84
+ };
85
+
86
+ enum WordAlignmentSort {
87
+ NoSort = 0,
88
+ TargetOrder = 1
89
+ };
90
+
91
+ enum S2TParsingAlgorithm {
92
+ RecursiveCYKPlus,
93
+ Scope3
94
+ };
95
+
96
+ enum SourceLabelOverlap {
97
+ SourceLabelOverlapAdd = 0,
98
+ SourceLabelOverlapReplace = 1,
99
+ SourceLabelOverlapDiscard = 2
100
+ };
101
+
102
+ /////////////////////////
103
+ // MOSES2 only
104
+
105
+ class StackAdd
106
+ {
107
+ public:
108
+ bool added;
109
+ HypothesisBase *other;
110
+
111
+ StackAdd() {
112
+ }
113
+ StackAdd(bool vadded, HypothesisBase *vOther) :
114
+ added(vadded), other(vOther) {
115
+ }
116
+ };
117
+
118
+ class Hypothesis;
119
+ typedef Vector<Hypothesis*> Batch;
120
+
121
+ class Factor;
122
+ typedef std::vector<const Factor*> Context;
123
+
124
+ }
125
+
mosesdecoder/moses2/Vector.cpp ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Vector.cpp
3
+ *
4
+ * Created on: 7 Dec 2015
5
+ * Author: hieu
6
+ */
7
+
8
+ #include "Vector.h"
9
+
10
+ namespace Moses2
11
+ {
12
+
13
+ }
14
+