sleepyhead111 commited on Apr 20, 2025

Commit

36ceee4

verified ·

1 Parent(s): cd33cd3

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

fairseq-0.10.2/fairseq_cli/preprocess.py +398 -0
fairseq-0.10.2/fairseq_cli/score.py +96 -0
fairseq-0.10.2/tests/test_backtranslation_dataset.py +123 -0
fairseq-0.10.2/tests/test_constraints.py +269 -0
fairseq-0.10.2/tests/test_convtbc.py +54 -0
fairseq-0.10.2/tests/test_inference_dropout.py +66 -0
fairseq-0.10.2/tests/test_metrics.py +77 -0
fairseq-0.10.2/tests/test_sequence_scorer.py +120 -0
fairseq-0.10.2/tests/test_token_block_dataset.py +79 -0
mosesdecoder/.beautify-ignore +38 -0
mosesdecoder/.travis.yml +24 -0
mosesdecoder/azure-pipelines.yml +100 -0
mosesdecoder/biconcor/Jamfile +2 -0
mosesdecoder/biconcor/PhrasePairCollection.cpp +209 -0
mosesdecoder/biconcor/PhrasePairCollection.h +46 -0
mosesdecoder/biconcor/SuffixArray.h +82 -0
mosesdecoder/biconcor/biconcor.cpp +171 -0
mosesdecoder/chk.tmp +1 -0
mosesdecoder/doxygen.conf +1781 -0
mosesdecoder/moses-cmd/Jamfile +7 -0
mosesdecoder/moses-cmd/LatticeMBRGrid.cpp +215 -0
mosesdecoder/moses-cmd/Main.cpp +33 -0
mosesdecoder/moses-cmd/MainVW.cpp +186 -0
mosesdecoder/moses-cmd/MainVW.h +42 -0
mosesdecoder/moses2/AlignmentInfo.h +148 -0
mosesdecoder/moses2/AlignmentInfoCollection.cpp +62 -0
mosesdecoder/moses2/ArcLists.cpp +127 -0
mosesdecoder/moses2/Array.h +85 -0
mosesdecoder/moses2/EstimatedScores.h +59 -0
mosesdecoder/moses2/HypothesisBase.cpp +81 -0
mosesdecoder/moses2/HypothesisBase.h +76 -0
mosesdecoder/moses2/HypothesisColl.cpp +286 -0
mosesdecoder/moses2/HypothesisColl.h +75 -0
mosesdecoder/moses2/InputPathsBase.h +54 -0
mosesdecoder/moses2/Main.cpp +116 -0
mosesdecoder/moses2/Main.h +23 -0
mosesdecoder/moses2/ManagerBase.cpp +53 -0
mosesdecoder/moses2/ManagerBase.h +81 -0
mosesdecoder/moses2/MemPool.h +77 -0
mosesdecoder/moses2/MemPoolAllocator.h +85 -0
mosesdecoder/moses2/Moses2Wrapper.h +30 -0
mosesdecoder/moses2/Phrase.h +146 -0
mosesdecoder/moses2/Recycler.cpp +13 -0
mosesdecoder/moses2/Scores.h +81 -0
mosesdecoder/moses2/SubPhrase.h +54 -0
mosesdecoder/moses2/TargetPhrase.cpp +15 -0
mosesdecoder/moses2/TranslationTask.cpp +65 -0
mosesdecoder/moses2/TrellisPaths.h +64 -0
mosesdecoder/moses2/TypeDef.h +125 -0
mosesdecoder/moses2/Vector.cpp +14 -0

fairseq-0.10.2/fairseq_cli/preprocess.py ADDED Viewed

	@@ -0,0 +1,398 @@

+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data pre-processing: build vocabularies and binarize training data.
+"""
+import logging
+import os
+import shutil
+import sys
+from collections import Counter
+from itertools import zip_longest
+from multiprocessing import Pool
+from fairseq import options, tasks, utils
+from fairseq.binarizer import Binarizer
+from fairseq.data import indexed_dataset
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.preprocess")
+def main(args):
+    utils.import_user_module(args)
+    os.makedirs(args.destdir, exist_ok=True)
+    logger.addHandler(
+        logging.FileHandler(
+            filename=os.path.join(args.destdir, "preprocess.log"),
+        )
+    )
+    logger.info(args)
+    task = tasks.get_task(args.task)
+    def train_path(lang):
+        return "{}{}".format(args.trainpref, ("." + lang) if lang else "")
+    def file_name(prefix, lang):
+        fname = prefix
+        if lang is not None:
+            fname += ".{lang}".format(lang=lang)
+        return fname
+    def dest_path(prefix, lang):
+        return os.path.join(args.destdir, file_name(prefix, lang))
+    def dict_path(lang):
+        return dest_path("dict", lang) + ".txt"
+    def build_dictionary(filenames, src=False, tgt=False):
+        assert src ^ tgt
+        return task.build_dictionary(
+            filenames,
+            workers=args.workers,
+            threshold=args.thresholdsrc if src else args.thresholdtgt,
+            nwords=args.nwordssrc if src else args.nwordstgt,
+            padding_factor=args.padding_factor,
+        )
+    target = not args.only_source
+    if not args.srcdict and os.path.exists(dict_path(args.source_lang)):
+        raise FileExistsError(dict_path(args.source_lang))
+    if target and not args.tgtdict and os.path.exists(dict_path(args.target_lang)):
+        raise FileExistsError(dict_path(args.target_lang))
+    if args.joined_dictionary:
+        assert (
+            not args.srcdict or not args.tgtdict
+        ), "cannot use both --srcdict and --tgtdict with --joined-dictionary"
+        if args.srcdict:
+            src_dict = task.load_dictionary(args.srcdict)
+        elif args.tgtdict:
+            src_dict = task.load_dictionary(args.tgtdict)
+        else:
+            assert (
+                args.trainpref
+            ), "--trainpref must be set if --srcdict is not specified"
+            src_dict = build_dictionary(
+                {train_path(lang) for lang in [args.source_lang, args.target_lang]},
+                src=True,
+            )
+        tgt_dict = src_dict
+    else:
+        if args.srcdict:
+            src_dict = task.load_dictionary(args.srcdict)
+        else:
+            assert (
+                args.trainpref
+            ), "--trainpref must be set if --srcdict is not specified"
+            src_dict = build_dictionary([train_path(args.source_lang)], src=True)
+        if target:
+            if args.tgtdict:
+                tgt_dict = task.load_dictionary(args.tgtdict)
+            else:
+                assert (
+                    args.trainpref
+                ), "--trainpref must be set if --tgtdict is not specified"
+                tgt_dict = build_dictionary([train_path(args.target_lang)], tgt=True)
+        else:
+            tgt_dict = None
+    src_dict.save(dict_path(args.source_lang))
+    if target and tgt_dict is not None:
+        tgt_dict.save(dict_path(args.target_lang))
+    def make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers):
+        logger.info("[{}] Dictionary: {} types".format(lang, len(vocab)))
+        n_seq_tok = [0, 0]
+        replaced = Counter()
+        def merge_result(worker_result):
+            replaced.update(worker_result["replaced"])
+            n_seq_tok[0] += worker_result["nseq"]
+            n_seq_tok[1] += worker_result["ntok"]
+        input_file = "{}{}".format(
+            input_prefix, ("." + lang) if lang is not None else ""
+        )
+        offsets = Binarizer.find_offsets(input_file, num_workers)
+        pool = None
+        if num_workers > 1:
+            pool = Pool(processes=num_workers - 1)
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                pool.apply_async(
+                    binarize,
+                    (
+                        args,
+                        input_file,
+                        vocab,
+                        prefix,
+                        lang,
+                        offsets[worker_id],
+                        offsets[worker_id + 1],
+                    ),
+                    callback=merge_result,
+                )
+            pool.close()
+        ds = indexed_dataset.make_builder(
+            dataset_dest_file(args, output_prefix, lang, "bin"),
+            impl=args.dataset_impl,
+            vocab_size=len(vocab),
+        )
+        merge_result(
+            Binarizer.binarize(
+                input_file, vocab, lambda t: ds.add_item(t), offset=0, end=offsets[1]
+            )
+        )
+        if num_workers > 1:
+            pool.join()
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                temp_file_path = dataset_dest_prefix(args, prefix, lang)
+                ds.merge_file_(temp_file_path)
+                os.remove(indexed_dataset.data_file_path(temp_file_path))
+                os.remove(indexed_dataset.index_file_path(temp_file_path))
+        ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
+        logger.info(
+            "[{}] {}: {} sents, {} tokens, {:.3}% replaced by {}".format(
+                lang,
+                input_file,
+                n_seq_tok[0],
+                n_seq_tok[1],
+                100 * sum(replaced.values()) / n_seq_tok[1],
+                vocab.unk_word,
+            )
+        )
+    def make_binary_alignment_dataset(input_prefix, output_prefix, num_workers):
+        nseq = [0]
+        def merge_result(worker_result):
+            nseq[0] += worker_result["nseq"]
+        input_file = input_prefix
+        offsets = Binarizer.find_offsets(input_file, num_workers)
+        pool = None
+        if num_workers > 1:
+            pool = Pool(processes=num_workers - 1)
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                pool.apply_async(
+                    binarize_alignments,
+                    (
+                        args,
+                        input_file,
+                        utils.parse_alignment,
+                        prefix,
+                        offsets[worker_id],
+                        offsets[worker_id + 1],
+                    ),
+                    callback=merge_result,
+                )
+            pool.close()
+        ds = indexed_dataset.make_builder(
+            dataset_dest_file(args, output_prefix, None, "bin"), impl=args.dataset_impl
+        )
+        merge_result(
+            Binarizer.binarize_alignments(
+                input_file,
+                utils.parse_alignment,
+                lambda t: ds.add_item(t),
+                offset=0,
+                end=offsets[1],
+            )
+        )
+        if num_workers > 1:
+            pool.join()
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                temp_file_path = dataset_dest_prefix(args, prefix, None)
+                ds.merge_file_(temp_file_path)
+                os.remove(indexed_dataset.data_file_path(temp_file_path))
+                os.remove(indexed_dataset.index_file_path(temp_file_path))
+        ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
+        logger.info("[alignments] {}: parsed {} alignments".format(input_file, nseq[0]))
+    def make_dataset(vocab, input_prefix, output_prefix, lang, num_workers=1):
+        if args.dataset_impl == "raw":
+            # Copy original text file to destination folder
+            output_text_file = dest_path(
+                output_prefix + ".{}-{}".format(args.source_lang, args.target_lang),
+                lang,
+            )
+            shutil.copyfile(file_name(input_prefix, lang), output_text_file)
+        else:
+            make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers)
+    def make_all(lang, vocab):
+        if args.trainpref:
+            make_dataset(vocab, args.trainpref, "train", lang, num_workers=args.workers)
+        if args.validpref:
+            for k, validpref in enumerate(args.validpref.split(",")):
+                outprefix = "valid{}".format(k) if k > 0 else "valid"
+                make_dataset(
+                    vocab, validpref, outprefix, lang, num_workers=args.workers
+                )
+        if args.testpref:
+            for k, testpref in enumerate(args.testpref.split(",")):
+                outprefix = "test{}".format(k) if k > 0 else "test"
+                make_dataset(vocab, testpref, outprefix, lang, num_workers=args.workers)
+    def make_all_alignments():
+        if args.trainpref and os.path.exists(args.trainpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.trainpref + "." + args.align_suffix,
+                "train.align",
+                num_workers=args.workers,
+            )
+        if args.validpref and os.path.exists(args.validpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.validpref + "." + args.align_suffix,
+                "valid.align",
+                num_workers=args.workers,
+            )
+        if args.testpref and os.path.exists(args.testpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.testpref + "." + args.align_suffix,
+                "test.align",
+                num_workers=args.workers,
+            )
+    make_all(args.source_lang, src_dict)
+    if target:
+        make_all(args.target_lang, tgt_dict)
+    if args.align_suffix:
+        make_all_alignments()
+    logger.info("Wrote preprocessed data to {}".format(args.destdir))
+    if args.alignfile:
+        assert args.trainpref, "--trainpref must be set if --alignfile is specified"
+        src_file_name = train_path(args.source_lang)
+        tgt_file_name = train_path(args.target_lang)
+        freq_map = {}
+        with open(args.alignfile, "r", encoding="utf-8") as align_file:
+            with open(src_file_name, "r", encoding="utf-8") as src_file:
+                with open(tgt_file_name, "r", encoding="utf-8") as tgt_file:
+                    for a, s, t in zip_longest(align_file, src_file, tgt_file):
+                        si = src_dict.encode_line(s, add_if_not_exist=False)
+                        ti = tgt_dict.encode_line(t, add_if_not_exist=False)
+                        ai = list(map(lambda x: tuple(x.split("-")), a.split()))
+                        for sai, tai in ai:
+                            srcidx = si[int(sai)]
+                            tgtidx = ti[int(tai)]
+                            if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk():
+                                assert srcidx != src_dict.pad()
+                                assert srcidx != src_dict.eos()
+                                assert tgtidx != tgt_dict.pad()
+                                assert tgtidx != tgt_dict.eos()
+                                if srcidx not in freq_map:
+                                    freq_map[srcidx] = {}
+                                if tgtidx not in freq_map[srcidx]:
+                                    freq_map[srcidx][tgtidx] = 1
+                                else:
+                                    freq_map[srcidx][tgtidx] += 1
+        align_dict = {}
+        for srcidx in freq_map.keys():
+            align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get)
+        with open(
+            os.path.join(
+                args.destdir,
+                "alignment.{}-{}.txt".format(args.source_lang, args.target_lang),
+            ),
+            "w",
+            encoding="utf-8",
+        ) as f:
+            for k, v in align_dict.items():
+                print("{} {}".format(src_dict[k], tgt_dict[v]), file=f)
+def binarize(args, filename, vocab, output_prefix, lang, offset, end, append_eos=True):
+    ds = indexed_dataset.make_builder(
+        dataset_dest_file(args, output_prefix, lang, "bin"),
+        impl=args.dataset_impl,
+        vocab_size=len(vocab),
+    )
+    def consumer(tensor):
+        ds.add_item(tensor)
+    res = Binarizer.binarize(
+        filename, vocab, consumer, append_eos=append_eos, offset=offset, end=end
+    )
+    ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
+    return res
+def binarize_alignments(args, filename, parse_alignment, output_prefix, offset, end):
+    ds = indexed_dataset.make_builder(
+        dataset_dest_file(args, output_prefix, None, "bin"),
+        impl=args.dataset_impl,
+        vocab_size=None,
+    )
+    def consumer(tensor):
+        ds.add_item(tensor)
+    res = Binarizer.binarize_alignments(
+        filename, parse_alignment, consumer, offset=offset, end=end
+    )
+    ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
+    return res
+def dataset_dest_prefix(args, output_prefix, lang):
+    base = "{}/{}".format(args.destdir, output_prefix)
+    if lang is not None:
+        lang_part = ".{}-{}.{}".format(args.source_lang, args.target_lang, lang)
+    elif args.only_source:
+        lang_part = ""
+    else:
+        lang_part = ".{}-{}".format(args.source_lang, args.target_lang)
+    return "{}{}".format(base, lang_part)
+def dataset_dest_file(args, output_prefix, lang, extension):
+    base = dataset_dest_prefix(args, output_prefix, lang)
+    return "{}.{}".format(base, extension)
+def get_offsets(input_file, num_workers):
+    return Binarizer.find_offsets(input_file, num_workers)
+def cli_main():
+    parser = options.get_preprocessing_parser()
+    args = parser.parse_args()
+    main(args)
+if __name__ == "__main__":
+    cli_main()

fairseq-0.10.2/fairseq_cli/score.py ADDED Viewed

	@@ -0,0 +1,96 @@

+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+BLEU scoring of generated translations against reference translations.
+"""
+import argparse
+import os
+import sys
+from fairseq.data import dictionary
+from fairseq.scoring import bleu
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Command-line script for BLEU scoring."
+    )
+    # fmt: off
+    parser.add_argument('-s', '--sys', default='-', help='system output')
+    parser.add_argument('-r', '--ref', required=True, help='references')
+    parser.add_argument('-o', '--order', default=4, metavar='N',
+                        type=int, help='consider ngrams up to this order')
+    parser.add_argument('--ignore-case', action='store_true',
+                        help='case-insensitive scoring')
+    parser.add_argument('--sacrebleu', action='store_true',
+                        help='score with sacrebleu')
+    parser.add_argument('--sentence-bleu', action='store_true',
+                        help='report sentence-level BLEUs (i.e., with +1 smoothing)')
+    # fmt: on
+    return parser
+def cli_main():
+    parser = get_parser()
+    args = parser.parse_args()
+    print(args)
+    assert args.sys == "-" or os.path.exists(
+        args.sys
+    ), "System output file {} does not exist".format(args.sys)
+    assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)
+    dict = dictionary.Dictionary()
+    def readlines(fd):
+        for line in fd.readlines():
+            if args.ignore_case:
+                yield line.lower()
+            else:
+                yield line
+    if args.sacrebleu:
+        import sacrebleu
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                print(sacrebleu.corpus_bleu(fdsys, [fdref]))
+    elif args.sentence_bleu:
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
+                for i, (sys_tok, ref_tok) in enumerate(
+                    zip(readlines(fdsys), readlines(fdref))
+                ):
+                    scorer.reset(one_init=True)
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                    print(i, scorer.result_string(args.order))
+    else:
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
+                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                print(scorer.result_string(args.order))
+    if args.sys == "-":
+        score(sys.stdin)
+    else:
+        with open(args.sys, "r") as f:
+            score(f)
+if __name__ == "__main__":
+    cli_main()

fairseq-0.10.2/tests/test_backtranslation_dataset.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import unittest
+import tests.utils as test_utils
+import torch
+from fairseq.data import (
+    BacktranslationDataset,
+    LanguagePairDataset,
+    TransformEosDataset,
+)
+from fairseq.sequence_generator import SequenceGenerator
+class TestBacktranslationDataset(unittest.TestCase):
+    def setUp(self):
+        (
+            self.tgt_dict,
+            self.w1,
+            self.w2,
+            self.src_tokens,
+            self.src_lengths,
+            self.model,
+        ) = test_utils.sequence_generator_setup()
+        dummy_src_samples = self.src_tokens
+        self.tgt_dataset = test_utils.TestDataset(data=dummy_src_samples)
+        self.cuda = torch.cuda.is_available()
+    def _backtranslation_dataset_helper(
+        self,
+        remove_eos_from_input_src,
+        remove_eos_from_output_src,
+    ):
+        tgt_dataset = LanguagePairDataset(
+            src=self.tgt_dataset,
+            src_sizes=self.tgt_dataset.sizes,
+            src_dict=self.tgt_dict,
+            tgt=None,
+            tgt_sizes=None,
+            tgt_dict=None,
+        )
+        generator = SequenceGenerator(
+            [self.model],
+            tgt_dict=self.tgt_dict,
+            max_len_a=0,
+            max_len_b=200,
+            beam_size=2,
+            unk_penalty=0,
+        )
+        backtranslation_dataset = BacktranslationDataset(
+            tgt_dataset=TransformEosDataset(
+                dataset=tgt_dataset,
+                eos=self.tgt_dict.eos(),
+                # remove eos from the input src
+                remove_eos_from_src=remove_eos_from_input_src,
+            ),
+            src_dict=self.tgt_dict,
+            backtranslation_fn=(
+                lambda sample: generator.generate([self.model], sample)
+            ),
+            output_collater=TransformEosDataset(
+                dataset=tgt_dataset,
+                eos=self.tgt_dict.eos(),
+                # if we remove eos from the input src, then we need to add it
+                # back to the output tgt
+                append_eos_to_tgt=remove_eos_from_input_src,
+                remove_eos_from_src=remove_eos_from_output_src,
+            ).collater,
+            cuda=self.cuda,
+        )
+        dataloader = torch.utils.data.DataLoader(
+            backtranslation_dataset,
+            batch_size=2,
+            collate_fn=backtranslation_dataset.collater,
+        )
+        backtranslation_batch_result = next(iter(dataloader))
+        eos, pad, w1, w2 = self.tgt_dict.eos(), self.tgt_dict.pad(), self.w1, self.w2
+        # Note that we sort by src_lengths and add left padding, so actually
+        # ids will look like: [1, 0]
+        expected_src = torch.LongTensor([[w1, w2, w1, eos], [pad, pad, w1, eos]])
+        if remove_eos_from_output_src:
+            expected_src = expected_src[:, :-1]
+        expected_tgt = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]])
+        generated_src = backtranslation_batch_result["net_input"]["src_tokens"]
+        tgt_tokens = backtranslation_batch_result["target"]
+        self.assertTensorEqual(expected_src, generated_src)
+        self.assertTensorEqual(expected_tgt, tgt_tokens)
+    def test_backtranslation_dataset_no_eos_in_output_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=False,
+            remove_eos_from_output_src=True,
+        )
+    def test_backtranslation_dataset_with_eos_in_output_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=False,
+            remove_eos_from_output_src=False,
+        )
+    def test_backtranslation_dataset_no_eos_in_input_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=True,
+            remove_eos_from_output_src=False,
+        )
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+if __name__ == "__main__":
+    unittest.main()

fairseq-0.10.2/tests/test_constraints.py ADDED Viewed

	@@ -0,0 +1,269 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import sys
+import unittest
+import torch
+from fairseq.token_generation_constraints import *
+def tensorize(constraints: List[List[int]]) -> torch.Tensor:
+    return [torch.tensor(x) for x in constraints]
+class TestHelperRoutines(unittest.TestCase):
+    def setUp(self):
+        self.examples = [
+            ([[]], torch.tensor([[0]])),
+            ([[], []], torch.tensor([[0], [0]])),
+            ([[torch.tensor([1, 2])], []], torch.tensor([[1, 1, 2, 0], [0, 0, 0, 0]])),
+            (
+                [
+                    [
+                        torch.tensor([3, 1, 2]),
+                        torch.tensor([3]),
+                        torch.tensor([4, 5, 6, 7]),
+                    ],
+                    [],
+                    [torch.tensor([1, 8, 9, 10, 1, 4, 11, 12])],
+                ],
+                torch.tensor(
+                    [
+                        [3, 3, 1, 2, 0, 3, 0, 4, 5, 6, 7, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [1, 1, 8, 9, 10, 1, 4, 11, 12, 0, 0, 0],
+                    ]
+                ),
+            ),
+        ]
+    def test_packing(self):
+        """Ensures the list of lists of tensors gets packed correctly."""
+        for batch_constraints, expected_tensor in self.examples:
+            packed = pack_constraints(batch_constraints)
+            assert torch.equal(packed, expected_tensor)
+class TestUnorderedConstraintState(unittest.TestCase):
+    def setUp(self):
+        # Tuples of (contraint set, expected printed graph, token counts per node)
+        self.examples = [
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                "([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
+                {1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
+            ),
+            ([], "[None].False#0", {}),
+            (tensorize([[0]]), "([None].False#1 [0].True#1)", {0: 1}),
+            (
+                tensorize([[100000, 1, 2, 3, 4, 5]]),
+                "([None].False#1 ([100000].False#1 ([1].False#1 ([2].False#1 ([3].False#1 ([4].False#1 [5].True#1))))))",
+                {100000: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                "([None].False#2 ([1].False#2 [2].True#2))",
+                {1: 2, 2: 2},
+            ),
+            (
+                tensorize([[1, 2], [3, 4]]),
+                "([None].False#2 ([1].False#1 [2].True#1) ([3].False#1 [4].True#1))",
+                {1: 1, 2: 1, 3: 1, 4: 1},
+            ),
+        ]
+        self.sequences = [
+            (
+                self.examples[0][0],
+                [],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2],
+                {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2, 94],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [1, 3, 999, 1, 4],
+                {"bank": 4, "num_completed": 2, "finished": False, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 3, 999, 1, 4, 999],
+                {"bank": 4, "num_completed": 2, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [4, 5, 6, 8],
+                {"bank": 2, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                # Tricky, because in last three, goes down [1->4] branch, could miss [1] and [4->5]
+                # [[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]],
+                [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": True},
+            ),
+            (
+                tensorize([[1], [2, 3]]),
+                # Should not be able to get credit for entering 1 a second time
+                [1, 1],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[4][0],
+                [1, 2, 1, 2],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                self.examples[4][0],
+                [1, 2, 1, 2, 1],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
+            ),
+            (
+                self.examples[5][0],
+                [1, 2, 3, 4, 5],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
+            ),
+        ]
+    def test_graphs(self):
+        """
+        Test whether unordered graph systems are created correctly.
+        """
+        for example in self.examples:
+            constraints, expected, gold_counts = example
+            c = ConstraintNode.create(constraints)
+            assert (
+                ConstraintNode.print_graph(c) == expected
+            ), f"got {ConstraintNode.print_graph(c)}, expected {expected}"
+            assert (
+                c.token_counts() == gold_counts
+            ), f"{c} got {c.token_counts()} wanted {gold_counts}"
+    def test_next_tokens(self):
+        """
+        Tests that the set of next tokens is correct.
+        """
+        for example in self.examples:
+            constraints, expected, gold_counts = example
+            root = ConstraintNode.create(constraints)
+            root_tokens = set(root.children.keys())
+            for sequence in constraints:
+                state = UnorderedConstraintState(root)
+                for token in sequence:
+                    all_tokens = root_tokens.union(state.node.children.keys())
+                    assert (
+                        all_tokens == state.next_tokens()
+                    ), f"ALL {all_tokens} NEXT {state.next_tokens()}"
+                    state = state.advance(token)
+    def test_sequences(self):
+        for constraints, tokens, expected in self.sequences:
+            state = UnorderedConstraintState.create(pack_constraints([constraints])[0])
+            for token in tokens:
+                state = state.advance(token)
+            result = {}
+            for attr in expected.keys():
+                result[attr] = getattr(state, attr)
+            assert (
+                result == expected
+            ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
+class TestOrderedConstraintState(unittest.TestCase):
+    def setUp(self):
+        self.sequences = [
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2],
+                {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 94],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 3, 999, 1, 4],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 999, 999],
+                {"bank": 3, "num_completed": 1, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 77, 1, 3, 1],
+                {"bank": 6, "num_completed": 2, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 999, 1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1], [2, 3]]),
+                [1, 1],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                [1, 2, 1, 2],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                [1, 2, 1, 2, 1],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [3, 4]]),
+                [1, 2, 3, 4, 5],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+        ]
+    def test_sequences(self):
+        for i, (constraints, tokens, expected) in enumerate(self.sequences):
+            state = OrderedConstraintState.create(pack_constraints([constraints])[0])
+            for token in tokens:
+                state = state.advance(token)
+            result = {}
+            for attr in expected.keys():
+                result[attr] = getattr(state, attr)
+            assert (
+                result == expected
+            ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
+if __name__ == "__main__":
+    unittest.main()

fairseq-0.10.2/tests/test_convtbc.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import unittest
+import torch
+import torch.nn as nn
+from fairseq.modules import ConvTBC
+class TestConvTBC(unittest.TestCase):
+    def test_convtbc(self):
+        # ksz, in_channels, out_channels
+        conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1)
+        # out_channels, in_channels, ksz
+        conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1)
+        conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2))
+        conv_tbc.bias.data.copy_(conv1d.bias.data)
+        input_tbc = torch.randn(7, 2, 4, requires_grad=True)
+        input1d = input_tbc.data.transpose(0, 1).transpose(1, 2)
+        input1d.requires_grad = True
+        output_tbc = conv_tbc(input_tbc)
+        output1d = conv1d(input1d)
+        self.assertAlmostEqual(
+            output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data
+        )
+        grad_tbc = torch.randn(output_tbc.size())
+        grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous()
+        output_tbc.backward(grad_tbc)
+        output1d.backward(grad1d)
+        self.assertAlmostEqual(
+            conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data
+        )
+        self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data)
+        self.assertAlmostEqual(
+            input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data
+        )
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+if __name__ == "__main__":
+    unittest.main()

fairseq-0.10.2/tests/test_inference_dropout.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+import unittest
+from fairseq.models.transformer import TransformerModel
+from tests.test_sequence_generator import get_dummy_task_and_parser
+class TestInferenceDropout(unittest.TestCase):
+    def setUp(self):
+        self.task, self.parser = get_dummy_task_and_parser()
+        TransformerModel.add_args(self.parser)
+        self.args = self.parser.parse_args([])
+        self.args.encoder_layers = 2
+        self.args.decoder_layers = 1
+        logging.disable(logging.CRITICAL)
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+    def test_sets_inference_dropout_to_true(self):
+        self.args.retain_dropout = True
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        self.transformer_model.prepare_for_inference_(self.args)
+        assert self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.encoder.layers:
+            assert layer.dropout_module.apply_during_inference
+    def test_inference_dropout_false_by_default(self):
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        self.transformer_model.prepare_for_inference_(self.args)
+        assert not self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert not self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.encoder.layers:
+            assert not layer.dropout_module.apply_during_inference
+        for layer in self.transformer_model.decoder.layers:
+            assert not layer.dropout_module.apply_during_inference
+    def test_applies_training_mode(self):
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        assert self.transformer_model.encoder.dropout_module.training
+        for layer in self.transformer_model.encoder.layers:
+            assert layer.dropout_module.training
+        self.transformer_model.eval()
+        assert not self.transformer_model.decoder.dropout_module.training
+        for layer in self.transformer_model.encoder.layers:
+            assert not layer.dropout_module.training
+    def test_retain_modules(self):
+        self.args.retain_dropout = True
+        self.args.retain_dropout_modules = [
+            "TransformerEncoder",
+            "TransformerEncoderLayer",
+        ]
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        self.transformer_model.prepare_for_inference_(self.args)
+        assert self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert not self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.decoder.layers:
+            assert not layer.dropout_module.apply_during_inference

fairseq-0.10.2/tests/test_metrics.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import unittest
+import uuid
+from fairseq import metrics
+class TestMetrics(unittest.TestCase):
+    def test_nesting(self):
+        with metrics.aggregate() as a:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate() as b:
+                metrics.log_scalar("loss", 2)
+        self.assertEqual(a.get_smoothed_values()["loss"], 1.5)
+        self.assertEqual(b.get_smoothed_values()["loss"], 2)
+    def test_new_root(self):
+        with metrics.aggregate() as a:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate(new_root=True) as b:
+                metrics.log_scalar("loss", 2)
+        self.assertEqual(a.get_smoothed_values()["loss"], 1)
+        self.assertEqual(b.get_smoothed_values()["loss"], 2)
+    def test_nested_new_root(self):
+        with metrics.aggregate() as layer1:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate(new_root=True) as layer2:
+                metrics.log_scalar("loss", 2)
+                with metrics.aggregate() as layer3:
+                    metrics.log_scalar("loss", 3)
+                    with metrics.aggregate(new_root=True) as layer4:
+                        metrics.log_scalar("loss", 4)
+            metrics.log_scalar("loss", 1.5)
+        self.assertEqual(layer4.get_smoothed_values()["loss"], 4)
+        self.assertEqual(layer3.get_smoothed_values()["loss"], 3)
+        self.assertEqual(layer2.get_smoothed_values()["loss"], 2.5)
+        self.assertEqual(layer1.get_smoothed_values()["loss"], 1.25)
+    def test_named(self):
+        name = str(uuid.uuid4())
+        metrics.reset_meters(name)
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 1)
+        metrics.log_scalar("loss", 3)
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 2)
+        self.assertEqual(metrics.get_smoothed_values(name)["loss"], 1.5)
+    def test_nested_duplicate_names(self):
+        name = str(uuid.uuid4())
+        metrics.reset_meters(name)
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate() as other:
+                with metrics.aggregate(name):
+                    metrics.log_scalar("loss", 2)
+            metrics.log_scalar("loss", 6)
+        self.assertEqual(metrics.get_smoothed_values(name)["loss"], 3)
+        self.assertEqual(other.get_smoothed_values()["loss"], 2)
+if __name__ == "__main__":
+    unittest.main()

fairseq-0.10.2/tests/test_sequence_scorer.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+import unittest
+import tests.utils as test_utils
+import torch
+from fairseq.sequence_scorer import SequenceScorer
+class TestSequenceScorer(unittest.TestCase):
+    def test_sequence_scorer(self):
+        # construct dummy dictionary
+        d = test_utils.dummy_dictionary(vocab_size=2)
+        self.assertEqual(d.pad(), 1)
+        self.assertEqual(d.eos(), 2)
+        self.assertEqual(d.unk(), 3)
+        eos = d.eos()
+        w1 = 4
+        w2 = 5
+        # construct dataloader
+        data = [
+            {
+                "source": torch.LongTensor([w1, w2, eos]),
+                "target": torch.LongTensor([w1, w2, w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w2, eos]),
+                "target": torch.LongTensor([w2, w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w2, eos]),
+                "target": torch.LongTensor([w2, eos]),
+            },
+        ]
+        data_itr = test_utils.dummy_dataloader(data)
+        # specify expected output probabilities
+        args = argparse.Namespace()
+        unk = 0.0
+        args.beam_probs = [
+            # step 0:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 0.6, 0.4],  # sentence 1
+                    [0.0, unk, 0.4, 0.6],  # sentence 2
+                    [0.0, unk, 0.7, 0.3],  # sentence 3
+                ]
+            ),
+            # step 1:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 0.2, 0.7],  # sentence 1
+                    [0.0, unk, 0.8, 0.2],  # sentence 2
+                    [0.7, unk, 0.1, 0.2],  # sentence 3
+                ]
+            ),
+            # step 2:
+            torch.FloatTensor(
+                [
+                    # eos       w1    w2
+                    [0.10, unk, 0.50, 0.4],  # sentence 1
+                    [0.15, unk, 0.15, 0.7],  # sentence 2
+                    [0.00, unk, 0.00, 0.0],  # sentence 3
+                ]
+            ),
+            # step 3:
+            torch.FloatTensor(
+                [
+                    # eos      w1    w2
+                    [0.9, unk, 0.05, 0.05],  # sentence 1
+                    [0.0, unk, 0.00, 0.0],  # sentence 2
+                    [0.0, unk, 0.00, 0.0],  # sentence 3
+                ]
+            ),
+        ]
+        expected_scores = [
+            [0.6, 0.7, 0.5, 0.9],  # sentence 1
+            [0.6, 0.8, 0.15],  # sentence 2
+            [0.3, 0.7],  # sentence 3
+        ]
+        task = test_utils.TestTranslationTask.setup_task(args, d, d)
+        model = task.build_model(args)
+        scorer = SequenceScorer(task.target_dictionary)
+        for sample in data_itr:
+            hypos = task.inference_step(scorer, [model], sample)
+            for id, hypos_id in zip(sample["id"].tolist(), hypos):
+                self.assertHypoTokens(hypos_id[0], data[id]["target"])
+                self.assertHypoScore(hypos_id[0], expected_scores[id])
+    def assertHypoTokens(self, hypo, tokens):
+        self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens))
+    def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
+        self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
+        score = pos_scores.sum()
+        if normalized:
+            score /= pos_scores.numel() ** lenpen
+        self.assertLess(abs(score - hypo["score"]), 1e-6)
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+if __name__ == "__main__":
+    unittest.main()

fairseq-0.10.2/tests/test_token_block_dataset.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import unittest
+import tests.utils as test_utils
+import torch
+from fairseq.data import TokenBlockDataset
+class TestTokenBlockDataset(unittest.TestCase):
+    def _build_dataset(self, data, **kwargs):
+        sizes = [len(x) for x in data]
+        underlying_ds = test_utils.TestDataset(data)
+        return TokenBlockDataset(underlying_ds, sizes, **kwargs)
+    def test_eos_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [1])
+        self.assertEqual(ds[2].tolist(), [8, 7, 6, 1])
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [8, 7, 6, 1])
+        self.assertEqual(ds[2].tolist(), [1])
+    def test_block_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([9, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=3, pad=0, eos=1, break_mode="none")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3])
+        self.assertEqual(ds[1].tolist(), [2, 1, 8])
+        self.assertEqual(ds[2].tolist(), [7, 6, 1])
+        self.assertEqual(ds[3].tolist(), [9, 1])
+    def test_complete_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([9, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(
+            data, block_size=6, pad=0, eos=1, break_mode="complete"
+        )
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [8, 7, 6, 1, 9, 1])
+        data = [
+            torch.tensor([4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([5, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+            torch.tensor([6, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(
+            data, block_size=3, pad=0, eos=1, break_mode="complete"
+        )
+        self.assertEqual(ds[0].tolist(), [4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [5, 1, 1])
+        self.assertEqual(ds[2].tolist(), [6, 1])
+if __name__ == "__main__":
+    unittest.main()

mosesdecoder/.beautify-ignore ADDED Viewed

	@@ -0,0 +1,38 @@

+# Files and directories that beautify.py should not clean up.
+#
+# This file is not as advanced as, say, .gitignore.  It only supports files
+# and directory paths relative to the project root, one per line, no globs,
+# no quotes.
+#
+# Leading and trailing whitespace is stripped from filenames, but internal
+# whitespace is preserved.
+#
+# Lines starting with a hash mark, such as this one, are comments.  The hash
+# mark must be the first character on the line.  Blank lines are ignored.
+#
+# The .beautify-ignore file must be encoded in UTF-8.
+boost
+contrib
+irstlm
+jam-files
+lm
+mingw/MosesGUI/icons_rc.py
+mingw/MosesGUI/Ui_credits.py
+mingw/MosesGUI/Ui_mainWindow.py
+moses/TranslationModel/UG
+moses/server
+moses/parameters
+moses/thread_safe_container.h
+phrase-extract/pcfg-common
+phrase-extract/syntax-common
+randlm
+# Filename suffixes in here are language codes, so e.g. ".pl" means
+# Polish, not Perl.
+scripts/share/nonbreaking_prefixes
+search
+srilm
+util
+xmlrpc-c
+.git
+util/ug_cache_with_timeout.h

mosesdecoder/.travis.yml ADDED Viewed

	@@ -0,0 +1,24 @@

+sudo: false
+dist: trusty
+language: c
+compiler: gcc
+env:
+  matrix:
+addons:
+  apt:
+    sources:
+      - ubuntu-toolchain-r-test
+    packages:
+      - subversion
+      - automake
+      - libtool
+      - zlib1g-dev
+      - libbz2-dev
+      - liblzma-dev
+      - libboost-all-dev
+      - libgoogle-perftools-dev
+      - libxmlrpc-c++.*-dev
+      - cmake
+      - csh
+script:
+- ./bjam -j4

mosesdecoder/azure-pipelines.yml ADDED Viewed

	@@ -0,0 +1,100 @@

+# Starter pipeline
+# Start with a minimal pipeline that you can customize to build and deploy your code.
+# Add steps that build, run tests, deploy, and more:
+# https://aka.ms/yaml
+trigger:
+- master
+pool:
+  #vmImage: 'ubuntu-latest'
+  vmImage: 'ubuntu-16.04'
+steps:
+- script: |
+    echo Printing some environment information
+    echo HOME: $HOME
+    echo
+    echo UBUNTU VERSION:
+    cat /etc/lsb-release
+    echo
+    echo CPU INFO
+    cat /proc/cpuinfo
+    echo
+    echo MEM INFO
+    cat /proc/meminfo
+    echo
+    echo DISK INFO
+    df -h
+    echo
+    echo PWD: $PWD
+    echo
+    ls
+  displayName: 'Printing some environment information'
+## Installation commands for Ubuntu
+- script: |
+    sudo apt-get install  \
+      g++  \
+      git  \
+      subversion \
+      automake \
+      libtool \
+      zlib1g-dev \
+      libicu-dev \
+      libboost-all-dev \
+      libssl-dev \
+      libbz2-dev \
+      liblzma-dev \
+      python-dev \
+      graphviz \
+      imagemagick \
+      make \
+      cmake \
+      libgoogle-perftools-dev \
+      autoconf \
+      doxygen
+  displayName: 'Install Ubuntu packages'
+- script: |
+    wget "https://sourceforge.net/projects/cmph/files/v2.0.2/cmph-2.0.2.tar.gz/download"
+    mv download  cmph-2.0.2.tar.gz
+    tar xvzf cmph-2.0.2.tar.gz
+    cd cmph-2.0.2
+    ./configure --prefix=$PWD
+    make
+    make install
+    cd ..
+  displayName: 'Build and Install cmph'
+- script: |
+    wget "https://sourceforge.net/projects/xmlrpc-c/files/Xmlrpc-c%20Super%20Stable/1.51.06/xmlrpc-c-1.51.06.tgz/download"
+    mv download  xmlrpc-c-1.51.06.tgz
+    tar xvzf xmlrpc-c-1.51.06.tgz
+    cd xmlrpc-c-1.51.06
+    ./configure --prefix=$PWD
+    make
+    make install
+    sudo ldconfig
+    cd ..
+  displayName: 'Build and Install xmlrpc-c'
+- script: |
+    ./bjam \
+      --with-cmph=$PWD/cmph-2.0.2 \
+      --with-xmlrpc-c=$PWD/xmlrpc-c-1.51.06 \
+      -j2
+  displayName: 'Build Moses'
+# - script: |
+#     ./bjam \
+#       -j2
+#   displayName: 'Build Moses'
+# - task: ComponentGovernanceComponentDetection@0
+#   inputs:
+#     scanType: 'Register'
+#     verbosity: 'Verbose'
+#     alertWarningLevel: 'High'

mosesdecoder/biconcor/Jamfile ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
2	+ exe phrase-lookup : Vocabulary.cpp SuffixArray.cpp phrase-lookup.cpp ;

mosesdecoder/biconcor/PhrasePairCollection.cpp ADDED Viewed

	@@ -0,0 +1,209 @@

+#include "PhrasePairCollection.h"
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+#include "Vocabulary.h"
+#include "SuffixArray.h"
+#include "TargetCorpus.h"
+#include "Alignment.h"
+#include "PhrasePair.h"
+#include "Mismatch.h"
+using namespace std;
+PhrasePairCollection::PhrasePairCollection( SuffixArray *sa, TargetCorpus *tc, Alignment *a, int max_translation, int max_example )
+  :m_suffixArray(sa)
+  ,m_targetCorpus(tc)
+  ,m_alignment(a)
+  ,m_size(0)
+  ,m_max_lookup(10000)          // maximum number of source occurrences sampled
+  ,m_max_translation(max_translation)    // max number of different distinct translations returned
+  ,m_max_example(max_example) // max number of examples returned for each distinct translation
+{}
+PhrasePairCollection::~PhrasePairCollection()
+{}
+int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
+{
+  INDEX first_match, last_match;
+  if (! m_suffixArray->FindMatches( sourceString, first_match, last_match )) {
+    return 0;
+  }
+  //cerr << "\tfirst match " << first_match << endl;
+  //cerr << "\tlast match " << last_match << endl;
+  INDEX found = last_match - first_match +1;
+  map< vector< WORD_ID >, INDEX > index;
+  int real_count = 0;
+  for( INDEX i=first_match; i<=last_match; i++ ) {
+    int position = m_suffixArray->GetPosition( i );
+    int source_start = m_suffixArray->GetWordInSentence( position );
+    int source_end = source_start + sourceString.size()-1;
+    INDEX sentence_id = m_suffixArray->GetSentence( position );
+    int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );
+    int target_length = m_targetCorpus->GetSentenceLength( sentence_id );
+    //cerr << "match " << (i-first_match)
+    //<< " in sentence " << sentence_id
+    //<< ", starting at word " << source_start
+    //<< " of " << sentence_length
+    //<< ". target sentence has " << target_length << " words.";
+    int target_start, target_end, pre_null, post_null;
+    if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
+      //cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
+      //cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
+      bool null_boundary_words = false;
+      for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
+        for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
+          vector< WORD_ID > targetString;
+          //cerr << "; ";
+          for (int target = target_start - pre; target <= target_end + post; target++) {
+            targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );
+            //cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";
+          }
+          PhrasePair *phrasePair = new PhrasePair( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, target_length, position, source_start, source_end, target_start-pre, target_end+post, pre, post, pre_null-pre, post_null-post);
+          // matchCollection.Add( sentence_id, )
+          if (index.find( targetString ) == index.end()) {
+            index[targetString] = m_collection.size();
+            vector< PhrasePair* > emptyVector;
+            m_collection.push_back( emptyVector );
+          }
+          m_collection[ index[targetString] ].push_back( phrasePair );
+          m_size++;
+        }
+      }
+    } else {
+      //cerr << "mismatch " << (i-first_match)
+      //		 << " in sentence " << sentence_id
+      //		 << ", starting at word " << source_start
+      //		 << " of " << sentence_length
+      //		 << ". target sentence has " << target_length << " words.";
+      Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
+      if (mismatch->Unaligned())
+        m_unaligned.push_back( mismatch );
+      else
+        m_mismatch.push_back( mismatch );
+    }
+    //cerr << endl;
+    if (found > (INDEX)m_max_lookup) {
+      i += found/m_max_lookup-1;
+    }
+    real_count++;
+  }
+  sort(m_collection.begin(), m_collection.end(), CompareBySize());
+  return real_count;
+}
+void PhrasePairCollection::Print(bool pretty) const
+{
+  vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
+  int i=0;
+  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && i<m_max_translation; i++, ppWithSameTarget++ ) {
+    (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
+    int count = ppWithSameTarget->size();
+    cout << "(" << count << ")" << endl;
+    vector< PhrasePair* >::const_iterator p = ppWithSameTarget->begin();
+    for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
+      if (pretty) {
+        (*p)->PrintPretty( &cout, 100 );
+      } else {
+        (*p)->Print( &cout );
+      }
+      if (ppWithSameTarget->size() > m_max_example) {
+        p += ppWithSameTarget->size()/m_max_example-1;
+      }
+    }
+  }
+}
+void PhrasePairCollection::PrintHTML() const
+{
+  int pp_target = 0;
+  bool singleton = false;
+  // loop over all translations
+  vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
+  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {
+    int count = ppWithSameTarget->size();
+    if (!singleton) {
+      if (count == 1) {
+        singleton = true;
+        cout << "<p class=\"pp_singleton_header\">singleton"
+             << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
+             << (m_collection.end() - ppWithSameTarget)
+             << "/" << m_size << ")</p>";
+      } else {
+        cout << "<p class=\"pp_target_header\">";
+        (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
+        cout << " (" << count << "/" << m_size << ")" << endl;
+        cout << "<p><div id=\"pp_" << pp_target << "\">";
+      }
+      cout << "<table align=\"center\">";
+    }
+    vector< PhrasePair* >::const_iterator p;
+    // loop over all sentences where translation occurs
+    int pp=0;
+    int i=0;
+    for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
+      (*p)->PrintClippedHTML( &cout, 160 );
+      if (count > m_max_example) {
+        p += count/m_max_example-1;
+        pp += count/m_max_example-1;
+      }
+    }
+    if (i == 10 && pp < count) {
+      // extended table
+      cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
+      cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
+      cout << "<table align=\"center\">";
+      for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
+        (*p)->PrintClippedHTML( &cout, 160 );
+        if (count > m_max_example) {
+          p += count/m_max_example-1;
+          pp += count/m_max_example-1;
+        }
+      }
+    }
+    if (!singleton) cout << "</table></div>\n";
+    if (!singleton && pp_target == 9) {
+      cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
+      cout << "<p class=\"pp_target_header\">(more)</p></div>";
+      cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
+    }
+  }
+  if (singleton) cout << "</table></div>\n";
+  else if (pp_target > 9)	cout << "</div>";
+  size_t max_mismatch = m_max_example/3;
+  // unaligned phrases
+  if (m_unaligned.size() > 0) {
+    cout << "<p class=\"pp_singleton_header\">unaligned"
+         << " (" << (m_unaligned.size()) << ")</p>";
+    cout << "<table align=\"center\">";
+    int step_size = 1;
+    if (m_unaligned.size() > max_mismatch)
+      step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
+    for(size_t i=0; i<m_unaligned.size(); i+=step_size)
+      m_unaligned[i]->PrintClippedHTML( &cout, 160 );
+    cout << "</table>";
+  }
+  // mismatched phrases
+  if (m_mismatch.size() > 0) {
+    cout << "<p class=\"pp_singleton_header\">mismatched"
+         << " (" << (m_mismatch.size()) << ")</p>";
+    cout << "<table align=\"center\">";
+    int step_size = 1;
+    if (m_mismatch.size() > max_mismatch)
+      step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
+    for(size_t i=0; i<m_mismatch.size(); i+=step_size)
+      m_mismatch[i]->PrintClippedHTML( &cout, 160 );
+    cout << "</table>";
+  }
+}

mosesdecoder/biconcor/PhrasePairCollection.h ADDED Viewed

	@@ -0,0 +1,46 @@

+#pragma once
+#include <vector>
+#include <string>
+class Alignment;
+class PhrasePair;
+class SuffixArray;
+class TargetCorpus;
+class Mismatch;
+class PhrasePairCollection
+{
+public:
+  typedef unsigned int INDEX;
+private:
+  SuffixArray *m_suffixArray;
+  TargetCorpus *m_targetCorpus;
+  Alignment *m_alignment;
+  std::vector<std::vector<PhrasePair*> > m_collection;
+  std::vector< Mismatch* > m_mismatch, m_unaligned;
+  int m_size;
+  int m_max_lookup;
+  int m_max_translation;
+  int m_max_example;
+  // No copying allowed.
+  PhrasePairCollection(const PhrasePairCollection&);
+  void operator=(const PhrasePairCollection&);
+public:
+  PhrasePairCollection ( SuffixArray *, TargetCorpus *, Alignment *, int, int );
+  ~PhrasePairCollection ();
+  int GetCollection( const std::vector<std::string >& sourceString );
+  void Print(bool pretty) const;
+  void PrintHTML() const;
+};
+// sorting helper
+struct CompareBySize {
+  bool operator()(const std::vector<PhrasePair*>& a, const std::vector<PhrasePair*>& b ) const {
+    return a.size() > b.size();
+  }
+};

mosesdecoder/biconcor/SuffixArray.h ADDED Viewed

	@@ -0,0 +1,82 @@

+#pragma once
+#include "Vocabulary.h"
+class SuffixArray
+{
+public:
+  typedef unsigned int INDEX;
+private:
+  WORD_ID *m_array;
+  INDEX *m_index;
+  INDEX *m_buffer;
+  char *m_wordInSentence;
+  INDEX *m_sentence;
+  char *m_sentenceLength;
+  WORD_ID m_endOfSentence;
+  INDEX *m_document;
+  INDEX *m_documentName;
+  char *m_documentNameBuffer;
+  size_t m_documentNameLength;
+  size_t m_documentCount;
+  bool m_useDocument;
+  Vocabulary m_vcb;
+  INDEX m_size;
+  INDEX m_sentenceCount;
+  // No copying allowed.
+  SuffixArray(const SuffixArray&);
+  void operator=(const SuffixArray&);
+public:
+  SuffixArray();
+  ~SuffixArray();
+  void Create(const std::string& fileName );
+  bool ProcessDocumentLine( const char* const, const size_t );
+  void Sort(INDEX start, INDEX end);
+  int CompareIndex( INDEX a, INDEX b ) const;
+  inline int CompareWord( WORD_ID a, WORD_ID b ) const;
+  int Count( const std::vector< WORD > &phrase );
+  bool MinCount( const std::vector< WORD > &phrase, INDEX min );
+  bool Exists( const std::vector< WORD > &phrase );
+  int FindMatches( const std::vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
+  int LimitedCount( const std::vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
+  INDEX FindFirst( const std::vector< WORD > &phrase, INDEX &start, INDEX &end );
+  INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
+  int Match( const std::vector< WORD > &phrase, INDEX index );
+  void List( INDEX start, INDEX end );
+  void PrintSentenceMatches( const std::vector< WORD > &phrase );
+  inline INDEX GetPosition( INDEX index ) const {
+    return m_index[ index ];
+  }
+  inline INDEX GetSentence( INDEX position ) const {
+    return m_sentence[position];
+  }
+  inline char GetWordInSentence( INDEX position ) const {
+    return m_wordInSentence[position];
+  }
+  inline char GetSentenceLength( INDEX sentenceId ) const {
+    return m_sentenceLength[sentenceId];
+  }
+  inline INDEX GetSize() const {
+    return m_size;
+  }
+  inline WORD GetWord( INDEX position ) const {
+    return m_vcb.GetWord( m_array[position] );
+  }
+  void UseDocument() {
+    m_useDocument = true;
+  }
+  INDEX GetDocument( INDEX sentence ) const;
+  void PrintDocumentName( INDEX document ) {
+    for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
+      std::cout << m_documentNameBuffer[ i ];
+    }
+  }
+  void Save(const std::string& fileName ) const;
+  void Load(const std::string& fileName );
+  void CheckAllocation(bool, const char *dataStructure) const;
+  bool Error( const char* message, const std::string& fileName) const;
+};

mosesdecoder/biconcor/biconcor.cpp ADDED Viewed

	@@ -0,0 +1,171 @@

+#include "SuffixArray.h"
+#include "TargetCorpus.h"
+#include "Alignment.h"
+#include "PhrasePairCollection.h"
+#include <getopt.h>
+#include "base64.h"
+using namespace std;
+int main(int argc, char* argv[])
+{
+  // handle parameters
+  string query;
+  string fileNameSuffix;
+  string fileNameSource;
+  string fileNameTarget = "";
+  string fileNameAlignment = "";
+  int loadFlag = false;
+  int saveFlag = false;
+  int createFlag = false;
+  int queryFlag = false;
+  int htmlFlag = false;   // output as HTML
+  int prettyFlag = false; // output readable on screen
+  int stdioFlag = false;  // receive requests from STDIN, respond to STDOUT
+  int max_translation = 20;
+  int max_example = 50;
+  string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create source-corpus]\n\t[--query string]\n\t[--target target-corpus]\n\t[--alignment file]\n\t[--translations count]\n\t[--examples count]\n\t[--html]\n\t[--stdio]\n";
+  while(1) {
+    static struct option long_options[] = {
+      {"load", required_argument, 0, 'l'},
+      {"save", required_argument, 0, 's'},
+      {"create", required_argument, 0, 'c'},
+      {"query", required_argument, 0, 'q'},
+      {"target", required_argument, 0, 't'},
+      {"alignment", required_argument, 0, 'a'},
+      {"html", no_argument, 0, 'h'},
+      {"pretty", no_argument, 0, 'p'},
+      {"stdio", no_argument, 0, 'i'},
+      {"translations", required_argument, 0, 'o'},
+      {"examples", required_argument, 0, 'e'},
+      {0, 0, 0, 0}
+    };
+    int option_index = 0;
+    int c = getopt_long (argc, argv, "l:s:c:q:Q:t:a:hpio:e:", long_options, &option_index);
+    if (c == -1) break;
+    switch (c) {
+    case 'l':
+      fileNameSuffix = string(optarg);
+      loadFlag = true;
+      break;
+    case 't':
+      fileNameTarget = string(optarg);
+      break;
+    case 'a':
+      fileNameAlignment = string(optarg);
+      break;
+    case 's':
+      fileNameSuffix = string(optarg);
+      saveFlag = true;
+      break;
+    case 'c':
+      fileNameSource = string(optarg);
+      createFlag = true;
+      break;
+    case 'Q':
+      query = base64_decode(string(optarg));
+      queryFlag = true;
+      break;
+    case 'q':
+      query = string(optarg);
+      queryFlag = true;
+      break;
+    case 'o':
+      max_translation = atoi(optarg);
+      break;
+    case 'e':
+      max_example = atoi(optarg);
+      break;
+    case 'p':
+      prettyFlag = true;
+      break;
+    case 'h':
+      htmlFlag = true;
+      break;
+    case 'i':
+      stdioFlag = true;
+      break;
+    default:
+      cerr << info;
+      exit(1);
+    }
+  }
+  if (stdioFlag) {
+    queryFlag = true;
+  }
+  // check if parameter settings are legal
+  if (saveFlag && !createFlag) {
+    cerr << "error: cannot save without creating\n" << info;
+    exit(1);
+  }
+  if (saveFlag && loadFlag) {
+    cerr << "error: cannot load and save at the same time\n" << info;
+    exit(1);
+  }
+  if (!loadFlag && !createFlag) {
+    cerr << "error: neither load or create - i have no info!\n" << info;
+    exit(1);
+  }
+  if (createFlag && (fileNameTarget == "" || fileNameAlignment == "")) {
+    cerr << "error: i have no target corpus or alignment\n" << info;
+    exit(1);
+  }
+  // do your thing
+  SuffixArray suffixArray;
+  TargetCorpus targetCorpus;
+  Alignment alignment;
+  if (createFlag) {
+    cerr << "will create\n";
+    cerr << "source corpus is in " << fileNameSource << endl;
+    suffixArray.Create( fileNameSource );
+    cerr << "target corpus is in " << fileNameTarget << endl;
+    targetCorpus.Create( fileNameTarget );
+    cerr << "alignment is in " << fileNameAlignment << endl;
+    alignment.Create( fileNameAlignment );
+    if (saveFlag) {
+      suffixArray.Save( fileNameSuffix );
+      targetCorpus.Save( fileNameSuffix );
+      alignment.Save( fileNameSuffix );
+      cerr << "will save in " << fileNameSuffix << endl;
+    }
+  }
+  if (loadFlag) {
+    cerr << "will load from " << fileNameSuffix << endl;
+    suffixArray.Load( fileNameSuffix );
+    targetCorpus.Load( fileNameSuffix );
+    alignment.Load( fileNameSuffix );
+  }
+  if (stdioFlag) {
+    cout << "-|||- BICONCOR START -|||-" << endl << flush;
+    while(true) {
+      string query;
+      if (getline(cin, query, '\n').eof()) {
+        return 0;
+      }
+      vector< string > queryString = alignment.Tokenize( query.c_str() );
+      PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
+      int total = ppCollection.GetCollection( queryString );
+      cout << "TOTAL: " << total << endl;
+      if (htmlFlag) {
+        ppCollection.PrintHTML();
+      } else {
+        ppCollection.Print(prettyFlag);
+      }
+      cout << "-|||- BICONCOR END -|||-" << endl << flush;
+    }
+  } else if (queryFlag) {
+    cerr << "query is " << query << endl;
+    vector< string > queryString = alignment.Tokenize( query.c_str() );
+    PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
+    ppCollection.GetCollection( queryString );
+    if (htmlFlag) {
+      ppCollection.PrintHTML();
+    } else {
+      ppCollection.Print(prettyFlag);
+    }
+  }
+  return 0;
+}

mosesdecoder/chk.tmp ADDED Viewed

	@@ -0,0 +1 @@


1	+ test

mosesdecoder/doxygen.conf ADDED Viewed

	@@ -0,0 +1,1781 @@

+# Doxyfile 1.7.6.1
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+DOXYFILE_ENCODING      = UTF-8
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
+# to put quotes around the project name if it contains spaces.
+PROJECT_NAME           = "Moses Decoder"
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+PROJECT_NUMBER         =
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+PROJECT_BRIEF          =
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+PROJECT_LOGO           =
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+OUTPUT_DIRECTORY       = doxy
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+CREATE_SUBDIRS         = NO
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+OUTPUT_LANGUAGE        = English
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+BRIEF_MEMBER_DESC      = YES
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+REPEAT_BRIEF           = YES
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+ABBREVIATE_BRIEF       =
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+ALWAYS_DETAILED_SEC    = NO
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+INLINE_INHERITED_MEMB  = NO
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+FULL_PATH_NAMES        = YES
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+STRIP_FROM_PATH        =
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+STRIP_FROM_INC_PATH    =
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+SHORT_NAMES            = NO
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+JAVADOC_AUTOBRIEF      = NO
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+QT_AUTOBRIEF           = NO
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+MULTILINE_CPP_IS_BRIEF = NO
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+INHERIT_DOCS           = YES
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+SEPARATE_MEMBER_PAGES  = NO
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+TAB_SIZE               = 8
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+ALIASES                =
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
+# itcl::class meaning.
+TCL_SUBST              =
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+OPTIMIZE_OUTPUT_FOR_C  = NO
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+OPTIMIZE_OUTPUT_JAVA   = NO
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+OPTIMIZE_FOR_FORTRAN   = NO
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+OPTIMIZE_OUTPUT_VHDL   = NO
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this
+# tag. The format is ext=language, where ext is a file extension, and language
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+EXTENSION_MAPPING      =
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+BUILTIN_STL_SUPPORT    = NO
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+CPP_CLI_SUPPORT        = NO
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+SIP_SUPPORT            = NO
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+IDL_PROPERTY_SUPPORT   = YES
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+DISTRIBUTE_GROUP_DOC   = NO
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+SUBGROUPING            = YES
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+INLINE_GROUPED_CLASSES = NO
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields will be shown inline in the documentation
+# of the scope in which they are defined (i.e. file, namespace, or group
+# documentation), provided this scope is documented. If set to NO (the default),
+# structs, classes, and unions are shown on a separate page (for HTML and Man
+# pages) or section (for LaTeX and RTF).
+INLINE_SIMPLE_STRUCTS  = NO
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+TYPEDEF_HIDES_STRUCT   = NO
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+SYMBOL_CACHE_SIZE      = 0
+# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
+# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
+# their name and scope. Since this can be an expensive process and often the
+# same symbol appear multiple times in the code, doxygen keeps a cache of
+# pre-resolved symbols. If the cache is too small doxygen will become slower.
+# If the cache is too large, memory is wasted. The cache size is given by this
+# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+LOOKUP_CACHE_SIZE      = 0
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+EXTRACT_ALL            = YES
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+EXTRACT_PRIVATE        = YES
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+EXTRACT_STATIC         = YES
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+EXTRACT_LOCAL_CLASSES  = NO
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+EXTRACT_LOCAL_METHODS  = NO
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+EXTRACT_ANON_NSPACES   = NO
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+HIDE_UNDOC_MEMBERS     = NO
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+HIDE_UNDOC_CLASSES     = NO
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+HIDE_FRIEND_COMPOUNDS  = NO
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+HIDE_IN_BODY_DOCS      = NO
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+INTERNAL_DOCS          = NO
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+CASE_SENSE_NAMES       = YES
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+HIDE_SCOPE_NAMES       = NO
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+SHOW_INCLUDE_FILES     = YES
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+FORCE_LOCAL_INCLUDES   = NO
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+INLINE_INFO            = YES
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+SORT_MEMBER_DOCS       = YES
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+SORT_BRIEF_DOCS        = NO
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+SORT_MEMBERS_CTORS_1ST = NO
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+SORT_GROUP_NAMES       = NO
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+SORT_BY_SCOPE_NAME     = NO
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+STRICT_PROTO_MATCHING  = NO
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+GENERATE_TODOLIST      = YES
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+GENERATE_TESTLIST      = YES
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+GENERATE_BUGLIST       = YES
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+GENERATE_DEPRECATEDLIST= YES
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+ENABLED_SECTIONS       =
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+MAX_INITIALIZER_LINES  = 30
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+SHOW_USED_FILES        = YES
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+SHOW_DIRECTORIES       = NO
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+SHOW_FILES             = YES
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+SHOW_NAMESPACES        = YES
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+FILE_VERSION_FILTER    =
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. The create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+LAYOUT_FILE            =
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path.
+CITE_BIB_FILES         =
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+QUIET                  = NO
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+WARNINGS               = YES
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+WARN_IF_UNDOCUMENTED   = YES
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+WARN_IF_DOC_ERROR      = YES
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+WARN_NO_PARAMDOC       = NO
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+WARN_FORMAT            = "$file:$line: $text"
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+WARN_LOGFILE           =
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+INPUT                  = moses
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+INPUT_ENCODING         = UTF-8
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+FILE_PATTERNS          =
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+RECURSIVE              = YES
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+EXCLUDE                = opt regtest doxy
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+EXCLUDE_SYMLINKS       = NO
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+EXCLUDE_PATTERNS       = opt/* regtest/* doxy/*
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+EXCLUDE_SYMBOLS        =
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+EXAMPLE_PATH           =
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+EXAMPLE_PATTERNS       =
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+EXAMPLE_RECURSIVE      = NO
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+IMAGE_PATH             =
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+INPUT_FILTER           =
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+FILTER_PATTERNS        =
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+FILTER_SOURCE_FILES    = NO,
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+FILTER_SOURCE_PATTERNS =
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+SOURCE_BROWSER         = NO
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+INLINE_SOURCES         = NO
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+STRIP_CODE_COMMENTS    = YES
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+REFERENCED_BY_RELATION = NO
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+REFERENCES_RELATION    = NO
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+REFERENCES_LINK_SOURCE = YES
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+USE_HTAGS              = NO
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+ALPHABETICAL_INDEX     = YES
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+COLS_IN_ALPHA_INDEX    = 5
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+IGNORE_PREFIX          =
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+GENERATE_HTML          = YES
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+HTML_OUTPUT            = html
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+HTML_FILE_EXTENSION    = .html
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+#  for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+HTML_HEADER            =
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+HTML_FOOTER            =
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# style sheet in the HTML output directory as well, or it will be erased!
+HTML_STYLESHEET        =
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+HTML_EXTRA_FILES       =
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+HTML_COLORSTYLE_HUE    = 220
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+HTML_COLORSTYLE_SAT    = 100
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+HTML_COLORSTYLE_GAMMA  = 80
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+HTML_TIMESTAMP         = YES
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+HTML_ALIGN_MEMBERS     = YES
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+HTML_DYNAMIC_SECTIONS  = NO
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+GENERATE_DOCSET        = NO
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+DOCSET_PUBLISHER_NAME  = Publisher
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+GENERATE_HTMLHELP      = NO
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+CHM_FILE               =
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+HHC_LOCATION           =
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+GENERATE_CHI           = NO
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+CHM_INDEX_ENCODING     =
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+BINARY_TOC             = NO
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+TOC_EXPAND             = NO
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+GENERATE_QHP           = NO
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+QCH_FILE               =
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+QHP_NAMESPACE          = org.doxygen.Project
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+QHP_VIRTUAL_FOLDER     = doc
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+QHP_CUST_FILTER_NAME   =
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+QHP_CUST_FILTER_ATTRS  =
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+QHP_SECT_FILTER_ATTRS  =
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+QHG_LOCATION           =
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+GENERATE_ECLIPSEHELP   = NO
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+ECLIPSE_DOC_ID         = org.doxygen.Project
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
+# GENERATE_TREEVIEW to YES.
+DISABLE_INDEX          = NO
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+GENERATE_TREEVIEW      = NO
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+ENUM_VALUES_PER_LINE   = 4
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+USE_INLINE_TREES       = NO
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+TREEVIEW_WIDTH         = 250
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+EXT_LINKS_IN_WINDOW    = NO
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+FORMULA_FONTSIZE       = 10
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+FORMULA_TRANSPARENT    = YES
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+USE_MATHJAX            = NO
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the
+# mathjax.org site, so you can quickly see the result without installing
+# MathJax, but it is strongly recommended to install a local copy of MathJax
+# before deployment.
+MATHJAX_RELPATH        = http://www.mathjax.org/mathjax
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
+# names that should be enabled during MathJax rendering.
+MATHJAX_EXTENSIONS     =
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+SEARCHENGINE           = YES
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvantages are that it is more difficult to setup
+# and does not have live searching capabilities.
+SERVER_BASED_SEARCH    = NO
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+GENERATE_LATEX         = NO
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+LATEX_OUTPUT           = latex
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+LATEX_CMD_NAME         = latex
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+MAKEINDEX_CMD_NAME     = makeindex
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+COMPACT_LATEX          = NO
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+PAPER_TYPE             = a4
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+EXTRA_PACKAGES         =
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+LATEX_HEADER           =
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+LATEX_FOOTER           =
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+PDF_HYPERLINKS         = YES
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+USE_PDFLATEX           = YES
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+LATEX_BATCHMODE        = NO
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+LATEX_HIDE_INDICES     = NO
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+LATEX_SOURCE_CODE      = NO
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+LATEX_BIB_STYLE        = plain
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+GENERATE_RTF           = NO
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+RTF_OUTPUT             = rtf
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+COMPACT_RTF            = NO
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+RTF_HYPERLINKS         = NO
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+RTF_STYLESHEET_FILE    =
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+RTF_EXTENSIONS_FILE    =
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+GENERATE_MAN           = NO
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+MAN_OUTPUT             = man
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+MAN_EXTENSION          = .3
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+GENERATE_XML           = NO
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+XML_OUTPUT             = xml
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+XML_SCHEMA             =
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+XML_DTD                =
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+XML_PROGRAMLISTING     = YES
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+GENERATE_PERLMOD       = NO
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+PERLMOD_LATEX          = NO
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+PERLMOD_PRETTY         = YES
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+PERLMOD_MAKEVAR_PREFIX =
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+ENABLE_PREPROCESSING   = YES
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+MACRO_EXPANSION        = NO
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+EXPAND_ONLY_PREDEF     = NO
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+SEARCH_INCLUDES        = YES
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+INCLUDE_PATH           =
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+INCLUDE_FILE_PATTERNS  =
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+PREDEFINED             =
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+EXPAND_AS_DEFINED      =
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+TAGFILES               =
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+GENERATE_TAGFILE       =
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+ALLEXTERNALS           = NO
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+EXTERNAL_GROUPS        = YES
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+CLASS_DIAGRAMS         = YES
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+MSCGEN_PATH            =
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+HIDE_UNDOC_RELATIONS   = YES
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+HAVE_DOT               = YES
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+DOT_NUM_THREADS        = 0
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font.
+DOT_FONTNAME           = Helvetica
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+DOT_FONTSIZE           = 10
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
+# set the path where dot can find it.
+DOT_FONTPATH           =
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# CLASS_DIAGRAMS tag to NO.
+CLASS_GRAPH            = YES
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+COLLABORATION_GRAPH    = YES
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+GROUP_GRAPHS           = YES
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+UML_LOOK               = NO
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+TEMPLATE_RELATIONS     = YES
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+INCLUDE_GRAPH          = YES
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+INCLUDED_BY_GRAPH      = YES
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+CALL_GRAPH             = NO
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+CALLER_GRAPH           = NO
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+GRAPHICAL_HIERARCHY    = YES
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+DIRECTORY_GRAPH        = YES
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible in IE 9+ (other browsers do not have this requirement).
+DOT_IMAGE_FORMAT       = png
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible. Older versions of IE do not have SVG support.
+INTERACTIVE_SVG        = NO
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+DOT_PATH               =
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+DOTFILE_DIRS           =
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+MSCFILE_DIRS           =
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+DOT_GRAPH_MAX_NODES    = 50
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+MAX_DOT_GRAPH_DEPTH    = 0
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+DOT_TRANSPARENT        = NO
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+DOT_MULTI_TARGETS      = YES
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+GENERATE_LEGEND        = YES
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+DOT_CLEANUP            = YES

mosesdecoder/moses-cmd/Jamfile ADDED Viewed

	@@ -0,0 +1,7 @@

+alias deps :  ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
+exe moses : Main.cpp deps ;
+exe vwtrainer : MainVW.cpp deps ;
+exe lmbrgrid : LatticeMBRGrid.cpp deps ;
+alias programs : moses lmbrgrid vwtrainer ;

mosesdecoder/moses-cmd/LatticeMBRGrid.cpp ADDED Viewed

	@@ -0,0 +1,215 @@

+// $Id: LatticeMBRGrid.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2010 University of Edinburgh
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+            this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+            this list of conditions and the following disclaimer in the documentation
+            and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+            may be used to endorse or promote products derived from this software
+            without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+/**
+* Lattice MBR grid search. Enables a grid search through the four parameters (p,r,scale and prune) used in lattice MBR.
+  See 'Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation by Tromble, Kumar, Och and Macherey,
+    EMNLP 2008 for details of the parameters.
+  The grid search is controlled by specifying comma separated lists for the lmbr parameters (-lmbr-p, -lmbr-r,
+  -lmbr-pruning-factor and -mbr-scale). All other parameters are passed through to moses. If any of the lattice mbr
+  parameters are missing, then they are set to their default values. Output is of the form:
+   sentence-id ||| p r prune scale ||| translation-hypothesis
+**/
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <stdexcept>
+#include <set>
+#include "moses/IOWrapper.h"
+#include "moses/LatticeMBR.h"
+#include "moses/Manager.h"
+#include "moses/Timer.h"
+#include "moses/StaticData.h"
+#include "util/exception.hh"
+#include <boost/foreach.hpp>
+#include "moses/TranslationTask.h"
+using namespace std;
+using namespace Moses;
+//keys
+enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
+namespace Moses
+{
+class Grid
+{
+public:
+  /** Add a parameter with key, command line argument, and default value */
+  void addParam(gridkey key, const string& arg, float defaultValue) {
+    m_args[arg] = key;
+    UTIL_THROW_IF2(m_grid.find(key) != m_grid.end(),
+                   "Couldn't find value for key " << (int) key);
+    m_grid[key].push_back(defaultValue);
+  }
+  /** Parse the arguments, removing those that define the grid and returning a copy of the rest */
+  void parseArgs(int& argc, char const**& argv) {
+    char const** newargv = new char const*[argc+1]; //Space to add mbr parameter
+    int newargc = 0;
+    for (int i = 0; i < argc; ++i) {
+      bool consumed = false;
+      for (map<string,gridkey>::const_iterator argi = m_args.begin(); argi != m_args.end(); ++argi) {
+        if (!strcmp(argv[i], argi->first.c_str())) {
+          ++i;
+          if (i >= argc) {
+            cerr << "Error: missing parameter for " << argi->first << endl;
+            throw runtime_error("Missing parameter");
+          } else {
+            string value = argv[i];
+            gridkey key = argi->second;
+            if (m_grid[key].size() != 1) {
+              throw runtime_error("Duplicate grid argument");
+            }
+            m_grid[key].clear();
+            char delim = ',';
+            string::size_type lastpos = value.find_first_not_of(delim);
+            string::size_type pos = value.find_first_of(delim,lastpos);
+            while (string::npos != pos || string::npos != lastpos) {
+              float param = atof(value.substr(lastpos, pos-lastpos).c_str());
+              if (!param) {
+                cerr << "Error: Illegal grid parameter for " << argi->first << endl;
+                throw runtime_error("Illegal grid parameter");
+              }
+              m_grid[key].push_back(param);
+              lastpos = value.find_first_not_of(delim,pos);
+              pos = value.find_first_of(delim,lastpos);
+            }
+            consumed = true;
+          }
+          if (consumed) break;
+        }
+      }
+      if (!consumed) {
+        // newargv[newargc] = new char[strlen(argv[i]) + 1];
+        // strcpy(newargv[newargc],argv[i]);
+        newargv[newargc] = argv[i];
+        ++newargc;
+      }
+    }
+    argc = newargc;
+    argv = newargv;
+  }
+  /** Get the grid for a particular key.*/
+  const vector<float>& getGrid(gridkey key) const {
+    map<gridkey,vector<float> >::const_iterator iter = m_grid.find(key);
+    assert (iter != m_grid.end());
+    return iter->second;
+  }
+private:
+  map<gridkey,vector<float> > m_grid;
+  map<string,gridkey> m_args;
+};
+} // namespace
+int main(int argc, char const* argv[])
+{
+  cerr << "Lattice MBR Grid search" << endl;
+  Grid grid;
+  grid.addParam(lmbr_p, "-lmbr-p", 0.5);
+  grid.addParam(lmbr_r, "-lmbr-r", 0.5);
+  grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
+  grid.addParam(lmbr_scale, "-mbr-scale",1.0);
+  grid.parseArgs(argc,argv);
+  Parameter* params = new Parameter();
+  if (!params->LoadParam(argc,argv)) {
+    params->Explain();
+    exit(1);
+  }
+  ResetUserTime();
+  if (!StaticData::LoadDataStatic(params, argv[0])) {
+    exit(1);
+  }
+  StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
+  boost::shared_ptr<AllOptions> opts(new AllOptions(*SD.options()));
+  LMBR_Options& lmbr = opts->lmbr;
+  MBR_Options&   mbr = opts->mbr;
+  lmbr.enabled = true;
+  boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
+  if (!ioWrapper) {
+    throw runtime_error("Failed to initialise IOWrapper");
+  }
+  size_t nBestSize = mbr.size;
+  if (nBestSize <= 0) {
+    throw new runtime_error("Non-positive size specified for n-best list");
+  }
+  const vector<float>& pgrid = grid.getGrid(lmbr_p);
+  const vector<float>& rgrid = grid.getGrid(lmbr_r);
+  const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
+  const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
+  boost::shared_ptr<InputType> source;
+  while((source = ioWrapper->ReadInput()) != NULL) {
+    // set up task of translating one sentence
+    boost::shared_ptr<TranslationTask> ttask;
+    ttask = TranslationTask::create(source, ioWrapper);
+    Manager manager(ttask);
+    manager.Decode();
+    TrellisPathList nBestList;
+    manager.CalcNBest(nBestSize, nBestList,true);
+    //grid search
+    BOOST_FOREACH(float const& p, pgrid) {
+      lmbr.precision = p;
+      BOOST_FOREACH(float const& r, rgrid) {
+        lmbr.ratio = r;
+        BOOST_FOREACH(size_t const prune_i, prune_grid) {
+          lmbr.pruning_factor = prune_i;
+          BOOST_FOREACH(float const& scale_i, scale_grid) {
+            mbr.scale = scale_i;
+            size_t lineCount = source->GetTranslationId();
+            cout << lineCount << " ||| " << p << " "
+                 << r << " " << size_t(prune_i) << " " << scale_i
+                 << " ||| ";
+            vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+            manager.OutputBestHypo(mbrBestHypo, cout);
+          }
+        }
+      }
+    }
+  }
+}

mosesdecoder/moses-cmd/Main.cpp ADDED Viewed

	@@ -0,0 +1,33 @@

+// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+/**
+ * Moses main wrapper for executable for single-threaded and multi-threaded, simply calling decoder_main.
+ **/
+#include "moses/ExportInterface.h"
+#include "util/string_stream.hh"
+/** main function of the command line version of the decoder **/
+int main(int argc, char const** argv)
+{
+  return decoder_main(argc, argv);
+}

mosesdecoder/moses-cmd/MainVW.cpp ADDED Viewed

	@@ -0,0 +1,186 @@

+// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+/**
+ * Moses main, for single-threaded and multi-threaded.
+ **/
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include "util/usage.hh"
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+#include "moses/IOWrapper.h"
+#include "moses/Hypothesis.h"
+#include "moses/Manager.h"
+#include "moses/StaticData.h"
+#include "moses/TypeDef.h"
+#include "moses/Util.h"
+#include "moses/Timer.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+#include "moses/TrainingTask.h"
+#include "util/random.hh"
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+using namespace std;
+using namespace Moses;
+namespace Moses
+{
+void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
+{
+  outputSearchGraphStream.setf(std::ios::fixed);
+  outputSearchGraphStream.precision(6);
+  StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
+}
+} //namespace
+/** main function of the command line version of the decoder **/
+int main(int argc, char const** argv)
+{
+  //setting in the Staticdata a link between the thread id of this process and a NULL tasksptr
+  // StaticData::InstanceNonConst().SetTask();  // => moved into StaticData constructor
+  try {
+#ifdef HAVE_PROTOBUF
+    GOOGLE_PROTOBUF_VERIFY_VERSION;
+#endif
+    // echo command line, if verbose
+    IFVERBOSE(1) {
+      TRACE_ERR("command: ");
+      for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+      TRACE_ERR(endl);
+    }
+    // set number of significant decimals in output
+    FixPrecision(cout);
+    FixPrecision(cerr);
+    // load all the settings into the Parameter class
+    // (stores them as strings, or array of strings)
+    Parameter params;
+    if (!params.LoadParam(argc,argv)) {
+      exit(1);
+    }
+    // initialize all "global" variables, which are stored in StaticData
+    // note: this also loads models such as the language model, etc.
+    ResetUserTime();
+    if (!StaticData::LoadDataStatic(&params, argv[0])) {
+      exit(1);
+    }
+    // setting "-show-weights" -> just dump out weights and exit
+    if (params.isParamSpecified("show-weights")) {
+      ShowWeights();
+      exit(0);
+    }
+    // shorthand for accessing information in StaticData
+    const StaticData& staticData = StaticData::Instance();
+    //initialise random numbers
+    util::rand_init();
+    // set up read/writing class
+    IFVERBOSE(1) {
+      PrintUserTime("Created input-output object");
+    }
+    AllOptions::ptr opts(new AllOptions(*StaticData::Instance().options()));
+    boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
+    if (ioWrapper == NULL) {
+      cerr << "Error; Failed to create IO object" << endl;
+      exit(1);
+    }
+    // check on weights
+    const ScoreComponentCollection& weights = staticData.GetAllWeights();
+    IFVERBOSE(2) {
+      TRACE_ERR("The global weight vector looks like this: ");
+      TRACE_ERR(weights);
+      TRACE_ERR("\n");
+    }
+#ifdef WITH_THREADS
+#pragma message ("Compiling with Threads.")
+    ThreadPool pool(staticData.ThreadCount());
+#endif
+    // main loop over set of input sentences
+    boost::shared_ptr<ContextScope> scope(new ContextScope);
+    boost::shared_ptr<InputType> source;
+    while ((source = ioWrapper->ReadInput()) != NULL) {
+      IFVERBOSE(1) {
+        ResetUserTime();
+      }
+      // set up task of training one sentence
+      boost::shared_ptr<TrainingTask> task;
+      task = TrainingTask::create(source, ioWrapper, scope);
+      // execute task
+#ifdef WITH_THREADS
+      pool.Submit(task);
+#else
+      task->Run();
+#endif
+    }
+    // we are done, finishing up
+#ifdef WITH_THREADS
+    pool.Stop(true); //flush remaining jobs
+#endif
+    FeatureFunction::Destroy();
+  } catch (const std::exception &e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+  IFVERBOSE(1) util::PrintUsage(std::cerr);
+#ifndef EXIT_RETURN
+  //This avoids that destructors are called (it can take a long time)
+  exit(EXIT_SUCCESS);
+#else
+  return EXIT_SUCCESS;
+#endif
+}

mosesdecoder/moses-cmd/MainVW.h ADDED Viewed

	@@ -0,0 +1,42 @@

+#pragma once
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+			this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+			this list of conditions and the following disclaimer in the documentation
+			and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+			may be used to endorse or promote products derived from this software
+			without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+// example file on how to use moses library
+#include "moses/StaticData.h"
+class IOWrapper;
+int main(int argc, char* argv[]);

mosesdecoder/moses2/AlignmentInfo.h ADDED Viewed

	@@ -0,0 +1,148 @@

+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#pragma once
+#include <iostream>
+#include <ostream>
+#include <set>
+#include <vector>
+#include <cstdlib>
+#include <boost/functional/hash.hpp>
+#include "TypeDef.h"
+namespace Moses2
+{
+class AlignmentInfoCollection;
+class System;
+/** Collection of non-terminal alignment pairs, ordered by source index.
+  * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
+ */
+class AlignmentInfo
+{
+  friend struct AlignmentInfoOrderer;
+  friend struct AlignmentInfoHasher;
+  friend class AlignmentInfoCollection;
+  friend class VW;
+  friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
+public:
+  typedef std::set<std::pair<size_t,size_t> > CollType;
+  typedef std::vector<size_t> NonTermIndexMap;
+  typedef CollType::const_iterator const_iterator;
+  const_iterator begin() const {
+    return m_collection.begin();
+  }
+  const_iterator end() const {
+    return m_collection.end();
+  }
+  void Add(size_t sourcePos, size_t targetPos) {
+    m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
+  }
+  /** Provides a map from target-side to source-side non-terminal indices.
+    * The target-side index should be the rule symbol index (COUNTING terminals).
+    * The index returned is the rule non-terminal index (IGNORING terminals).
+   */
+  const NonTermIndexMap &GetNonTermIndexMap() const {
+    return m_nonTermIndexMap;
+  }
+  /** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
+    * the index counting both terminals and non-terminals) */
+  const NonTermIndexMap &GetNonTermIndexMap2() const {
+    return m_nonTermIndexMap2;
+  }
+  const CollType &GetAlignments() const {
+    return m_collection;
+  }
+  std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
+  std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
+  size_t GetSize() const {
+    return m_collection.size();
+  }
+  std::vector< const std::pair<size_t,size_t>* >
+  GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
+  std::vector<size_t> GetSourceIndex2PosMap() const;
+  bool operator==(const AlignmentInfo& rhs) const {
+    return m_collection == rhs.m_collection &&
+           m_nonTermIndexMap == rhs.m_nonTermIndexMap;
+  }
+  std::string Debug(const System &system) const;
+private:
+  //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
+  explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
+  explicit AlignmentInfo(const std::vector<unsigned char> &aln);
+  // used only by VW to load word alignment between sentences
+  explicit AlignmentInfo(const std::string &str);
+  void BuildNonTermIndexMaps();
+  CollType m_collection;
+  NonTermIndexMap m_nonTermIndexMap;
+  NonTermIndexMap m_nonTermIndexMap2;
+};
+/** Define an arbitrary strict weak ordering between AlignmentInfo objects
+ * for use by AlignmentInfoCollection.
+ */
+struct AlignmentInfoOrderer {
+  bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
+    if (a.m_collection == b.m_collection) {
+      return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
+    } else {
+      return a.m_collection < b.m_collection;
+    }
+  }
+};
+/**
+ * Hashing functoid
+ **/
+struct AlignmentInfoHasher {
+  size_t operator()(const AlignmentInfo& a) const {
+    size_t seed = 0;
+    boost::hash_combine(seed,a.m_collection);
+    boost::hash_combine(seed,a.m_nonTermIndexMap);
+    return seed;
+  }
+};
+inline size_t hash_value(const AlignmentInfo& a)
+{
+  static AlignmentInfoHasher hasher;
+  return hasher(a);
+}
+}

mosesdecoder/moses2/AlignmentInfoCollection.cpp ADDED Viewed

	@@ -0,0 +1,62 @@

+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#include "AlignmentInfoCollection.h"
+using namespace std;
+namespace Moses2
+{
+AlignmentInfoCollection AlignmentInfoCollection::s_instance;
+AlignmentInfoCollection::AlignmentInfoCollection()
+{
+  std::set<std::pair<size_t,size_t> > pairs;
+  m_emptyAlignmentInfo = Add(pairs);
+}
+AlignmentInfoCollection::~AlignmentInfoCollection()
+{}
+const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
+{
+  return *m_emptyAlignmentInfo;
+}
+AlignmentInfo const *
+AlignmentInfoCollection::
+Add(AlignmentInfo const& ainfo)
+{
+#ifdef WITH_THREADS
+  {
+    boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+    AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
+    if (i != m_collection.end())
+      return &*i;
+  }
+  boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+  std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
+  return &(*ret.first);
+}
+}

mosesdecoder/moses2/ArcLists.cpp ADDED Viewed

	@@ -0,0 +1,127 @@

+/*
+ * ArcList.cpp
+ *
+ *  Created on: 26 Oct 2015
+ *      Author: hieu
+ */
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "ArcLists.h"
+#include "HypothesisBase.h"
+#include "util/exception.hh"
+using namespace std;
+namespace Moses2
+{
+ArcLists::ArcLists()
+{
+  // TODO Auto-generated constructor stub
+}
+ArcLists::~ArcLists()
+{
+  BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+    const ArcList *arcList = collPair.second;
+    delete arcList;
+  }
+}
+void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
+                      const HypothesisBase *otherHypo)
+{
+  //cerr << added << " " << currHypo << " " << otherHypo << endl;
+  ArcList *arcList;
+  if (added) {
+    // we're winners!
+    if (otherHypo) {
+      // there was a existing losing hypo
+      arcList = &GetAndDetachArcList(otherHypo);
+    } else {
+      // there was no existing hypo
+      arcList = new ArcList;
+    }
+    m_coll[currHypo] = arcList;
+  } else {
+    // we're losers!
+    // there should be a winner, we're not doing beam pruning
+    UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
+    arcList = &GetArcList(otherHypo);
+  }
+  // in any case, add the curr hypo
+  arcList->push_back(currHypo);
+}
+ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
+{
+  Coll::iterator iter = m_coll.find(hypo);
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+  ArcList &arcList = *iter->second;
+  return arcList;
+}
+const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
+{
+  Coll::const_iterator iter = m_coll.find(hypo);
+  if (iter == m_coll.end()) {
+    cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
+    BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+      const HypothesisBase *hypo = collPair.first;
+      cerr << hypo << " ";
+    }
+  }
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
+  ArcList &arcList = *iter->second;
+  return arcList;
+}
+ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
+{
+  Coll::iterator iter = m_coll.find(hypo);
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+  ArcList &arcList = *iter->second;
+  m_coll.erase(iter);
+  return arcList;
+}
+void ArcLists::Sort()
+{
+  BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
+    ArcList &list = *collPair.second;
+    std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
+  }
+}
+void ArcLists::Delete(const HypothesisBase *hypo)
+{
+  //cerr << "hypo=" << hypo->Debug() << endl;
+  //cerr << "m_coll=" << m_coll.size() << endl;
+  Coll::iterator iter = m_coll.find(hypo);
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+  ArcList *arcList = iter->second;
+  m_coll.erase(iter);
+  delete arcList;
+}
+std::string ArcLists::Debug(const System &system) const
+{
+  stringstream strm;
+  BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+    const ArcList *arcList = collPair.second;
+    strm << arcList << "(" << arcList->size() << ") ";
+  }
+  return strm.str();
+}
+}

mosesdecoder/moses2/Array.h ADDED Viewed

	@@ -0,0 +1,85 @@

+#pragma once
+#include <cassert>
+#include <boost/functional/hash.hpp>
+#include "MemPool.h"
+namespace Moses2
+{
+template<typename T>
+class Array
+{
+public:
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  //! iterators
+  const_iterator begin() const {
+    return m_arr;
+  }
+  const_iterator end() const {
+    return m_arr + m_size;
+  }
+  iterator begin() {
+    return m_arr;
+  }
+  iterator end() {
+    return m_arr + m_size;
+  }
+  Array(MemPool &pool, size_t size = 0, const T &val = T()) {
+    m_size = size;
+    m_maxSize = size;
+    m_arr = pool.Allocate<T>(size);
+    for (size_t i = 0; i < size; ++i) {
+      m_arr[i] = val;
+    }
+  }
+  size_t size() const {
+    return m_size;
+  }
+  const T& operator[](size_t ind) const {
+    assert(ind < m_size);
+    return m_arr[ind];
+  }
+  T& operator[](size_t ind) {
+    assert(ind < m_size);
+    return m_arr[ind];
+  }
+  T *GetArray() {
+    return m_arr;
+  }
+  size_t hash() const {
+    size_t seed = 0;
+    for (size_t i = 0; i < m_size; ++i) {
+      boost::hash_combine(seed, m_arr[i]);
+    }
+    return seed;
+  }
+  int Compare(const Array &compare) const {
+    int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
+    return cmp;
+  }
+  bool operator==(const Array &compare) const {
+    int cmp = Compare(compare);
+    return cmp == 0;
+  }
+  void resize(size_t newSize) {
+    assert(m_size <= m_maxSize);
+    m_size = newSize;
+  }
+protected:
+  size_t m_size, m_maxSize;
+  T *m_arr;
+};
+}

mosesdecoder/moses2/EstimatedScores.h ADDED Viewed

	@@ -0,0 +1,59 @@

+// $Id$
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+#pragma once
+#include <iostream>
+#include "legacy/Util2.h"
+#include "legacy/Bitmap.h"
+#include "legacy/Matrix.h"
+namespace Moses2
+{
+class MemPool;
+class System;
+//! A square array of floats to store future costs in the phrase-based decoder
+class EstimatedScores: public Matrix<float>
+{
+public:
+  EstimatedScores(MemPool &pool, size_t size) :
+    Matrix<float>(pool, size, size) {
+  }
+  ~EstimatedScores(); // not implemented
+  float CalcEstimatedScore(Bitmap const&) const;
+  float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
+  std::ostream &Debug(std::ostream &out, const System &system) const {
+    for (size_t endPos = 0; endPos < GetSize(); endPos++) {
+      for (size_t startPos = 0; startPos < GetSize(); startPos++)
+        out << GetValue(startPos, endPos) << " ";
+      out << std::endl;
+    }
+    return out;
+  }
+};
+}

mosesdecoder/moses2/HypothesisBase.cpp ADDED Viewed

	@@ -0,0 +1,81 @@

+/*
+ * Hypothesis.cpp
+ *
+ *  Created on: 24 Oct 2015
+ *      Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include <stdlib.h>
+#include <deque>
+#include "HypothesisBase.h"
+#include "System.h"
+#include "Scores.h"
+#include "ManagerBase.h"
+#include "MemPool.h"
+#include "FF/StatefulFeatureFunction.h"
+using namespace std;
+namespace Moses2
+{
+//size_t g_numHypos = 0;
+HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
+{
+  m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
+      system.featureFunctions.GetNumScores());
+  // FF states
+  const std::vector<const StatefulFeatureFunction*> &sfffs =
+    system.featureFunctions.GetStatefulFeatureFunctions();
+  size_t numStatefulFFs = sfffs.size();
+  m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
+  BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
+    size_t statefulInd = sfff->GetStatefulInd();
+    FFState *state = sfff->BlankState(pool, system);
+    m_ffStates[statefulInd] = state;
+  }
+}
+size_t HypothesisBase::hash() const
+{
+  return hash(0);
+}
+size_t HypothesisBase::hash(size_t seed) const
+{
+  size_t numStatefulFFs =
+    GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
+  // states
+  for (size_t i = 0; i < numStatefulFFs; ++i) {
+    const FFState *state = m_ffStates[i];
+    size_t hash = state->hash();
+    boost::hash_combine(seed, hash);
+  }
+  return seed;
+}
+bool HypothesisBase::operator==(const HypothesisBase &other) const
+{
+  size_t numStatefulFFs =
+    GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
+  // states
+  for (size_t i = 0; i < numStatefulFFs; ++i) {
+    const FFState &thisState = *m_ffStates[i];
+    const FFState &otherState = *other.m_ffStates[i];
+    if (thisState != otherState) {
+      return false;
+    }
+  }
+  return true;
+}
+}

mosesdecoder/moses2/HypothesisBase.h ADDED Viewed

	@@ -0,0 +1,76 @@

+/*
+ * Hypothesis.h
+ *
+ *  Created on: 24 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <iostream>
+#include <cstddef>
+#include "FF/FFState.h"
+#include "Scores.h"
+namespace Moses2
+{
+class ManagerBase;
+class Scores;
+class HypothesisBase
+{
+public:
+  virtual ~HypothesisBase() {
+  }
+  inline ManagerBase &GetManager() const {
+    return *m_mgr;
+  }
+  template<typename T>
+  const T &Cast() const {
+    return static_cast<const T&>(*this);
+  }
+  const Scores &GetScores() const {
+    return *m_scores;
+  }
+  Scores &GetScores() {
+    return *m_scores;
+  }
+  const FFState *GetState(size_t ind) const {
+    return m_ffStates[ind];
+  }
+  FFState *GetState(size_t ind) {
+    return m_ffStates[ind];
+  }
+  virtual size_t hash() const;
+  virtual size_t hash(size_t seed) const;
+  virtual bool operator==(const HypothesisBase &other) const;
+  virtual SCORE GetFutureScore() const = 0;
+  virtual void EvaluateWhenApplied() = 0;
+  virtual std::string Debug(const System &system) const = 0;
+protected:
+  ManagerBase *m_mgr;
+  Scores *m_scores;
+  FFState **m_ffStates;
+  HypothesisBase(MemPool &pool, const System &system);
+};
+////////////////////////////////////////////////////////////////////////////////////
+class HypothesisFutureScoreOrderer
+{
+public:
+  bool operator()(const HypothesisBase* a, const HypothesisBase* b) const {
+    return a->GetFutureScore() > b->GetFutureScore();
+  }
+};
+}

mosesdecoder/moses2/HypothesisColl.cpp ADDED Viewed

	@@ -0,0 +1,286 @@

+/*
+ * HypothesisColl.cpp
+ *
+ *  Created on: 26 Feb 2016
+ *      Author: hieu
+ */
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "HypothesisColl.h"
+#include "ManagerBase.h"
+#include "System.h"
+#include "MemPoolAllocator.h"
+using namespace std;
+namespace Moses2
+{
+HypothesisColl::HypothesisColl(const ManagerBase &mgr)
+  :m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
+  ,m_sortedHypos(NULL)
+{
+  m_bestScore = -std::numeric_limits<float>::infinity();
+  m_worstScore = std::numeric_limits<float>::infinity();
+}
+const HypothesisBase *HypothesisColl::GetBestHypo() const
+{
+  if (GetSize() == 0) {
+    return NULL;
+  }
+  if (m_sortedHypos) {
+    return (*m_sortedHypos)[0];
+  }
+  SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
+  const HypothesisBase *bestHypo;
+  BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+    if (hypo->GetFutureScore() > bestScore) {
+      bestScore = hypo->GetFutureScore();
+      bestHypo = hypo;
+    }
+  }
+  return bestHypo;
+}
+void HypothesisColl::Add(
+  const ManagerBase &mgr,
+  HypothesisBase *hypo,
+  Recycler<HypothesisBase*> &hypoRecycle,
+  ArcLists &arcLists)
+{
+  size_t maxStackSize = mgr.system.options.search.stack_size;
+  if (GetSize() > maxStackSize * 2) {
+    //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl;
+    PruneHypos(mgr, mgr.arcLists);
+  }
+  SCORE futureScore = hypo->GetFutureScore();
+  /*
+  cerr << "scores:"
+      << futureScore << " "
+      << m_bestScore << " "
+      << GetSize() << " "
+      << endl;
+  */
+  if (GetSize() >= maxStackSize && futureScore < m_worstScore) {
+    // beam threshold or really bad hypo that won't make the pruning cut
+    // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point
+    //cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl;
+    hypoRecycle.Recycle(hypo);
+    return;
+  }
+  StackAdd added = Add(hypo);
+  size_t nbestSize = mgr.system.options.nbest.nbest_size;
+  if (nbestSize) {
+    arcLists.AddArc(added.added, hypo, added.other);
+  } else {
+    if (added.added) {
+      if (added.other) {
+        hypoRecycle.Recycle(added.other);
+      }
+    } else {
+      hypoRecycle.Recycle(hypo);
+    }
+  }
+  // update beam variables
+  if (added.added) {
+    if (futureScore > m_bestScore) {
+      m_bestScore = futureScore;
+      float beamWidth = mgr.system.options.search.beam_width;
+      if ( m_bestScore + beamWidth > m_worstScore ) {
+        m_worstScore = m_bestScore + beamWidth;
+      }
+    } else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
+      m_worstScore = futureScore;
+    }
+  }
+}
+StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
+{
+  std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
+  //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
+  // CHECK RECOMBINATION
+  if (addRet.second) {
+    // equiv hypo doesn't exists
+    //cerr << "Added " << hypo << endl;
+    return StackAdd(true, NULL);
+  } else {
+    HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
+    //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
+    if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
+      // incoming hypo is better than the one we have
+  	  //cerr << "Add " << hypo << "(" << hypo->hash() << ")"
+	  //	  << " discard existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
+	  //	  << endl;
+      const HypothesisBase * const &hypoExisting1 = *addRet.first;
+      const HypothesisBase *&hypoExisting2 =
+        const_cast<const HypothesisBase *&>(hypoExisting1);
+      hypoExisting2 = hypo;
+      return StackAdd(true, hypoExisting);
+    } else {
+      // already storing the best hypo. discard incoming hypo
+      //cerr << "Keep existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
+      //		  << " discard new " << hypo << "(" << hypo->hash() << ")"
+	  //		  << endl;
+      return StackAdd(false, hypoExisting);
+    }
+  }
+  //assert(false);
+}
+const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos(
+  const ManagerBase &mgr,
+  ArcLists &arcLists) const
+{
+  if (m_sortedHypos == NULL) {
+    // create sortedHypos first
+    MemPool &pool = mgr.GetPool();
+    m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
+        m_coll.size());
+    SortHypos(mgr, m_sortedHypos->GetArray());
+    // prune
+    Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
+    size_t maxStackSize = mgr.system.options.search.stack_size;
+    if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
+      for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
+        HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
+        recycler.Recycle(hypo);
+        // delete from arclist
+        if (mgr.system.options.nbest.nbest_size) {
+          arcLists.Delete(hypo);
+        }
+      }
+      m_sortedHypos->resize(maxStackSize);
+    }
+  }
+  return *m_sortedHypos;
+}
+void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
+{
+  size_t maxStackSize = mgr.system.options.search.stack_size;
+  Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
+  const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *));
+  SortHypos(mgr, sortedHypos);
+  // update worse score
+  m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore();
+  // prune
+  for (size_t i = maxStackSize; i < GetSize(); ++i) {
+    HypothesisBase *hypo = const_cast<HypothesisBase*>(sortedHypos[i]);
+    // delete from arclist
+    if (mgr.system.options.nbest.nbest_size) {
+      arcLists.Delete(hypo);
+    }
+    // delete from collection
+    Delete(hypo);
+    recycler.Recycle(hypo);
+  }
+}
+void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const
+{
+  size_t maxStackSize = mgr.system.options.search.stack_size;
+  //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that
+  //assert(GetSize() > maxStackSize);
+  //assert(sortedHypos.size() == GetSize());
+  /*
+   cerr << "UNSORTED hypos: ";
+   BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+     cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
+   }
+   cerr << endl;
+   */
+  size_t ind = 0;
+  BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+    sortedHypos[ind] = hypo;
+    ++ind;
+  }
+  size_t indMiddle;
+  if (maxStackSize == 0) {
+    indMiddle = GetSize();
+  } else if (GetSize() > maxStackSize) {
+    indMiddle = maxStackSize;
+  } else {
+    // GetSize() <= maxStackSize
+    indMiddle = GetSize();
+  }
+  const HypothesisBase **iterMiddle = sortedHypos + indMiddle;
+  std::partial_sort(
+    sortedHypos,
+    iterMiddle,
+    sortedHypos + GetSize(),
+    HypothesisFutureScoreOrderer());
+  /*
+   cerr << "sorted hypos: ";
+   for (size_t i = 0; i < sortedHypos.size(); ++i) {
+     const HypothesisBase *hypo = sortedHypos[i];
+     cerr << hypo << " ";
+   }
+   cerr << endl;
+   */
+}
+void HypothesisColl::Delete(const HypothesisBase *hypo)
+{
+  //cerr << " Delete hypo=" << hypo << "(" << hypo->hash() << ")"
+  //		<< " m_coll=" << m_coll.size() << endl;
+  size_t erased = m_coll.erase(hypo);
+  UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo);
+}
+void HypothesisColl::Clear()
+{
+  m_sortedHypos = NULL;
+  m_coll.clear();
+  m_bestScore = -std::numeric_limits<float>::infinity();
+  m_worstScore = std::numeric_limits<float>::infinity();
+}
+std::string HypothesisColl::Debug(const System &system) const
+{
+  stringstream out;
+  BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
+    out << hypo->Debug(system);
+    out << std::endl << std::endl;
+  }
+  return out.str();
+}
+} /* namespace Moses2 */

mosesdecoder/moses2/HypothesisColl.h ADDED Viewed

	@@ -0,0 +1,75 @@

+/*
+ * HypothesisColl.h
+ *
+ *  Created on: 26 Feb 2016
+ *      Author: hieu
+ */
+#pragma once
+#include <unordered_set>
+#include "HypothesisBase.h"
+#include "MemPoolAllocator.h"
+#include "Recycler.h"
+#include "Array.h"
+#include "legacy/Util2.h"
+namespace Moses2
+{
+class ManagerBase;
+class ArcLists;
+typedef Array<const HypothesisBase*> Hypotheses;
+////////////////////////////////////////////////////
+class HypothesisColl
+{
+public:
+  HypothesisColl(const ManagerBase &mgr);
+  void Add(const ManagerBase &mgr,
+           HypothesisBase *hypo,
+           Recycler<HypothesisBase*> &hypoRecycle,
+           ArcLists &arcLists);
+  size_t GetSize() const {
+    return m_coll.size();
+  }
+  void Clear();
+  const Hypotheses &GetSortedAndPrunedHypos(
+    const ManagerBase &mgr,
+    ArcLists &arcLists) const;
+  const HypothesisBase *GetBestHypo() const;
+  template<typename T>
+  const T *GetBestHypo() const {
+    const HypothesisBase *hypo = GetBestHypo();
+    return hypo ? &hypo->Cast<T>() : NULL;
+  }
+  void Delete(const HypothesisBase *hypo);
+  std::string Debug(const System &system) const;
+protected:
+  typedef std::unordered_set<const HypothesisBase*,
+		  UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
+          MemPoolAllocator<const HypothesisBase*> > _HCType;
+  _HCType m_coll;
+  mutable Hypotheses *m_sortedHypos;
+  SCORE m_bestScore;
+  SCORE m_worstScore;
+  StackAdd Add(const HypothesisBase *hypo);
+  void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists);
+  void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const;
+};
+} /* namespace Moses2 */

mosesdecoder/moses2/InputPathsBase.h ADDED Viewed

	@@ -0,0 +1,54 @@

+/*
+ * InputPaths.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <vector>
+#include "MemPool.h"
+namespace Moses2
+{
+class InputType;
+class System;
+class ManagerBase;
+class InputPathBase;
+class InputPathsBase
+{
+  typedef std::vector<InputPathBase*> Coll;
+public:
+  InputPathsBase() {
+  }
+  virtual ~InputPathsBase();
+  //! iterators
+  typedef Coll::iterator iterator;
+  typedef Coll::const_iterator const_iterator;
+  const_iterator begin() const {
+    return m_inputPaths.begin();
+  }
+  const_iterator end() const {
+    return m_inputPaths.end();
+  }
+  iterator begin() {
+    return m_inputPaths.begin();
+  }
+  iterator end() {
+    return m_inputPaths.end();
+  }
+  virtual void Init(const InputType &input, const ManagerBase &mgr) = 0;
+protected:
+  Coll m_inputPaths;
+};
+}

mosesdecoder/moses2/Main.cpp ADDED Viewed

	@@ -0,0 +1,116 @@

+#include <iostream>
+#include <memory>
+#include <boost/pool/pool_alloc.hpp>
+#include "Main.h"
+#include "System.h"
+#include "Phrase.h"
+#include "TranslationTask.h"
+#include "MemPoolAllocator.h"
+#ifdef HAVE_XMLRPC_C
+    #include "server/Server.h"
+#endif // HAVE_XMLRPC_C
+#include "legacy/InputFileStream.h"
+#include "legacy/Parameter.h"
+#include "legacy/ThreadPool.h"
+#include "legacy/Timer.h"
+#include "legacy/Util2.h"
+#include "util/usage.hh"
+//#include <vld.h>
+using namespace std;
+//extern size_t g_numHypos;
+int main(int argc, char** argv)
+{
+  cerr << "Starting..." << endl;
+  Moses2::Timer timer;
+  timer.start();
+  //Temp();
+  Moses2::Parameter params;
+  if (!params.LoadParam(argc, argv)) {
+    return EXIT_FAILURE;
+  }
+  Moses2::System system(params);
+  timer.check("Loaded");
+  if (params.GetParam("show-weights")) {
+    return EXIT_SUCCESS;
+  }
+  //cerr << "system.numThreads=" << system.options.server.numThreads << endl;
+  Moses2::ThreadPool pool(system.options.server.numThreads, system.cpuAffinityOffset, system.cpuAffinityOffsetIncr);
+  //cerr << "CREATED POOL" << endl;
+  if (params.GetParam("server")) {
+    std::cerr << "RUN SERVER" << std::endl;
+    run_as_server(system);
+  }
+  else {
+      std::cerr << "RUN BATCH" << std::endl;
+      batch_run(params, system, pool);
+  }
+  cerr << "Decoding took " << timer.get_elapsed_time() << endl;
+  //	cerr << "g_numHypos=" << g_numHypos << endl;
+  cerr << "Finished" << endl;
+  return EXIT_SUCCESS;
+}
+////////////////////////////////////////////////////////////////////////////////////////////////
+void run_as_server(Moses2::System& system)
+{
+#ifdef HAVE_XMLRPC_C
+	Moses2::Server server(system.options.server, system);
+	server.run(system); // actually: don't return. see Server::run()
+#else
+  UTIL_THROW2("Moses2 was compiled without xmlrpc-c. "
+              << "No server functionality available.");
+#endif
+}
+////////////////////////////////////////////////////////////////////////////////////////////////
+istream &GetInputStream(Moses2::Parameter &params)
+{
+  const Moses2::PARAM_VEC *vec = params.GetParam("input-file");
+  if (vec && vec->size()) {
+    Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0));
+    return *stream;
+  } else {
+    return cin;
+  }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////
+void batch_run(Moses2::Parameter& params, Moses2::System& system, Moses2::ThreadPool& pool)
+{
+    istream& inStream = GetInputStream(params);
+    long translationId = 0;
+    string line;
+    while (getline(inStream, line)) {
+        //cerr << "line=" << line << endl;
+        boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
+        //cerr << "START pool.Submit()" << endl;
+        pool.Submit(task);
+        //task->Run();
+        ++translationId;
+    }
+    pool.Stop(true);
+    if (&inStream != &cin) {
+        delete& inStream;
+    }
+    //util::PrintUsage(std::cerr);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////

mosesdecoder/moses2/Main.h ADDED Viewed

	@@ -0,0 +1,23 @@

+/*
+ * Main.h
+ *
+ *  Created on: 1 Apr 2016
+ *      Author: hieu
+ */
+#pragma once
+#include <iostream>
+namespace Moses2
+{
+class Parameter;
+class System;
+class ThreadPool;
+}
+std::istream &GetInputStream(Moses2::Parameter &params);
+void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::ThreadPool &pool);
+void run_as_server(Moses2::System &system);
+void Temp();

mosesdecoder/moses2/ManagerBase.cpp ADDED Viewed

	@@ -0,0 +1,53 @@

+/*
+ * Manager.cpp
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include <vector>
+#include <sstream>
+#include "System.h"
+#include "ManagerBase.h"
+#include "Phrase.h"
+#include "InputPathsBase.h"
+#include "InputPathBase.h"
+#include "TranslationModel/PhraseTable.h"
+#include "legacy/Range.h"
+#include "PhraseBased/Sentence.h"
+using namespace std;
+namespace Moses2
+{
+ManagerBase::ManagerBase(System &sys, const TranslationTask &task,
+                         const std::string &inputStr, long translationId)
+  :system(sys)
+  ,task(task)
+  ,m_inputStr(inputStr)
+  ,m_translationId(translationId)
+  ,m_pool(NULL)
+  ,m_systemPool(NULL)
+  ,m_hypoRecycler(NULL)
+  ,m_input(NULL)
+{
+}
+ManagerBase::~ManagerBase()
+{
+  system.featureFunctions.CleanUpAfterSentenceProcessing(*m_input);
+  GetPool().Reset();
+  GetHypoRecycler().Clear();
+}
+void ManagerBase::InitPools()
+{
+  m_pool = &system.GetManagerPool();
+  m_systemPool = &system.GetSystemPool();
+  m_hypoRecycler = &system.GetHypoRecycler();
+  //cerr << "pool size " << m_pool->Size() << " " << m_systemPool->Size() << endl;
+}
+}

mosesdecoder/moses2/ManagerBase.h ADDED Viewed

	@@ -0,0 +1,81 @@

+/*
+ * Manager.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <queue>
+#include <cstddef>
+#include <string>
+#include <deque>
+#include "Phrase.h"
+#include "MemPool.h"
+#include "Recycler.h"
+#include "EstimatedScores.h"
+#include "ArcLists.h"
+#include "legacy/Bitmaps.h"
+namespace Moses2
+{
+class System;
+class TranslationTask;
+class PhraseImpl;
+class SearchNormal;
+class Search;
+class InputType;
+class OutputCollector;
+class HypothesisBase;
+class ManagerBase
+{
+public:
+  System &system;
+  const TranslationTask &task;
+  mutable ArcLists arcLists;
+  ManagerBase(System &sys, const TranslationTask &task,
+              const std::string &inputStr, long translationId);
+  virtual ~ManagerBase();
+  virtual void Decode() = 0;
+  virtual std::string OutputBest() const = 0;
+  virtual std::string OutputNBest() = 0;
+  virtual std::string OutputTransOpt() = 0;
+  MemPool &GetPool() const {
+    return *m_pool;
+  }
+  MemPool &GetSystemPool() const {
+    return *m_systemPool;
+  }
+  Recycler<HypothesisBase*> &GetHypoRecycler() const {
+    return *m_hypoRecycler;
+  }
+  const InputType &GetInput() const {
+    return *m_input;
+  }
+  long GetTranslationId() const {
+    return m_translationId;
+  }
+protected:
+  std::string m_inputStr;
+  long m_translationId;
+  InputType *m_input;
+  mutable MemPool *m_pool, *m_systemPool;
+  mutable Recycler<HypothesisBase*> *m_hypoRecycler;
+  void InitPools();
+};
+}

mosesdecoder/moses2/MemPool.h ADDED Viewed

	@@ -0,0 +1,77 @@

+/*
+ * MemPool.h
+ *
+ *  Created on: 28 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits>
+#include <iostream>
+namespace Moses2
+{
+class MemPool
+{
+  struct Page {
+    uint8_t *mem;
+    uint8_t *end;
+    size_t size;
+    Page() = delete;
+    Page(std::size_t size);
+    ~Page();
+  };
+public:
+  MemPool(std::size_t initSize = 10240);
+  ~MemPool();
+  uint8_t* Allocate(std::size_t size);
+  template<typename T>
+  T *Allocate() {
+    uint8_t *ret = Allocate(sizeof(T));
+    return (T*) ret;
+  }
+  template<typename T>
+  T *Allocate(size_t num) {
+    size_t size = sizeof(T);
+    size_t m = size % 16;
+    size += m;
+    uint8_t *ret = Allocate(size * num);
+    return (T*) ret;
+  }
+  // re-use pool
+  void Reset();
+  size_t Size();
+private:
+  uint8_t *More(std::size_t size);
+  std::vector<Page*> m_pages;
+  size_t m_currSize;
+  size_t m_currPage;
+  uint8_t *current_;
+  // no copying
+  MemPool(const MemPool &) = delete;
+  MemPool &operator=(const MemPool &) = delete;
+};
+}

mosesdecoder/moses2/MemPoolAllocator.h ADDED Viewed

	@@ -0,0 +1,85 @@

+#pragma once
+#include "MemPool.h"
+namespace Moses2
+{
+template<typename T>
+class MemPoolAllocator
+{
+public:
+  typedef T value_type;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef std::size_t size_type;
+  typedef std::ptrdiff_t difference_type;
+  template<class U>
+  struct rebind {
+    typedef MemPoolAllocator<U> other;
+  };
+  MemPoolAllocator(Moses2::MemPool &pool) :
+    m_pool(pool) {
+  }
+  MemPoolAllocator(const MemPoolAllocator &other) :
+    m_pool(other.m_pool) {
+  }
+  template<class U>
+  MemPoolAllocator(const MemPoolAllocator<U>& other) :
+    m_pool(other.m_pool) {
+  }
+  size_type max_size() const {
+    return std::numeric_limits<size_type>::max();
+  }
+  void deallocate(pointer p, size_type n) {
+    //std::cerr << "deallocate " << p << " " << n << std::endl;
+  }
+  pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
+    //std::cerr << "allocate " << n << " " << hint << std::endl;
+    pointer ret = m_pool.Allocate<T>(n);
+    return ret;
+  }
+  void construct(pointer p, const_reference val) {
+    //std::cerr << "construct " << p << " " << n << std::endl;
+    new ((void *) p) T(val);
+  }
+  void destroy(pointer p) {
+    //std::cerr << "destroy " << p << " " << n << std::endl;
+  }
+  // return address of values
+  pointer address (reference value) const {
+    return &value;
+  }
+  const_pointer address (const_reference value) const {
+    return &value;
+  }
+  bool operator==(const MemPoolAllocator<T> &allocator) const {
+    return true;
+  }
+  bool operator!=(const MemPoolAllocator<T> &allocator) const {
+    return false;
+  }
+  MemPoolAllocator<T>& operator=(const MemPoolAllocator<T>& allocator) {
+    return *this;
+  }
+  MemPool &m_pool;
+protected:
+};
+}

mosesdecoder/moses2/Moses2Wrapper.h ADDED Viewed

	@@ -0,0 +1,30 @@

+#pragma once
+#include <string>
+namespace Moses2 {
+	class Parameter;
+	class System;
+	extern "C" {
+		enum MosesApiErrorCode {
+			MS_API_OK,
+			MS_API_E_FAILURE,
+			MS_API_E_INPUT,
+			MS_API_E_TIMEOUT
+		};
+	}
+	class Moses2Wrapper
+	{
+		Parameter* m_param;
+		System* m_system;
+	public:
+		Moses2Wrapper(const std::string& filePath);
+		~Moses2Wrapper();
+		std::string Translate(const std::string& input, long id, bool nbest);
+		void UpdateLMPath(const std::string& filePath);
+		static char* CopyString(const char* str);
+		static void Free(void* ptr);
+	};
+}

mosesdecoder/moses2/Phrase.h ADDED Viewed

	@@ -0,0 +1,146 @@

+/*
+ * PhraseImpl.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <cstddef>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include "Word.h"
+#include "MemPool.h"
+#include "TypeDef.h"
+#include "legacy/FactorCollection.h"
+#include "SCFG/Word.h"
+#include <boost/functional/hash.hpp>
+namespace Moses2
+{
+template<typename WORD>
+class SubPhrase;
+class Scores;
+class PhraseTable;
+class MemPool;
+class System;
+template<typename WORD>
+class Phrase
+{
+public:
+  virtual ~Phrase() {
+  }
+  virtual const WORD& operator[](size_t pos) const = 0;
+  virtual size_t GetSize() const = 0;
+  virtual const WORD& Back() const {
+    assert(GetSize());
+    return (*this)[GetSize() - 1];
+  }
+  virtual size_t hash() const {
+    size_t seed = 0;
+    for (size_t i = 0; i < GetSize(); ++i) {
+      const WORD &word = (*this)[i];
+      size_t wordHash = word.hash();
+      boost::hash_combine(seed, wordHash);
+    }
+    return seed;
+  }
+  virtual bool operator==(const Phrase &compare) const {
+    if (GetSize() != compare.GetSize()) {
+      return false;
+    }
+    for (size_t i = 0; i < GetSize(); ++i) {
+      const WORD &word = (*this)[i];
+      const WORD &otherWord = compare[i];
+      if (word != otherWord) {
+        return false;
+      }
+    }
+    return true;
+  }
+  virtual bool operator!=(const Phrase &compare) const {
+    return !((*this) == compare);
+  }
+  virtual std::string GetString(const FactorList &factorTypes) const {
+    if (GetSize() == 0) {
+      return "";
+    }
+    std::stringstream ret;
+    const WORD &word = (*this)[0];
+    ret << word.GetString(factorTypes);
+    for (size_t i = 1; i < GetSize(); ++i) {
+      const WORD &word = (*this)[i];
+      ret << " " << word.GetString(factorTypes);
+    }
+    return ret.str();
+  }
+  virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
+  virtual std::string Debug(const System &system) const {
+    std::stringstream out;
+    size_t size = GetSize();
+    if (size) {
+      out << (*this)[0].Debug(system);
+      for (size_t i = 1; i < size; ++i) {
+        const WORD &word = (*this)[i];
+        out << " " << word.Debug(system);
+      }
+    }
+    return out.str();
+  }
+  virtual void OutputToStream(const System &system, std::ostream &out) const {
+    size_t size = GetSize();
+    if (size) {
+      (*this)[0].OutputToStream(system, out);
+      for (size_t i = 1; i < size; ++i) {
+        const WORD &word = (*this)[i];
+        out << " ";
+        word.OutputToStream(system, out);
+      }
+    }
+  }
+};
+////////////////////////////////////////////////////////////////////////
+template<typename WORD>
+class PhraseOrdererLexical
+{
+public:
+  bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const {
+    size_t minSize = std::min(a.GetSize(), b.GetSize());
+    for (size_t i = 0; i < minSize; ++i) {
+      const Word &aWord = a[i];
+      const Word &bWord = b[i];
+      int cmp = aWord.Compare(bWord);
+      //std::cerr << "WORD: " << aWord << " ||| " << bWord << " ||| " << lessThan << std::endl;
+      if (cmp) {
+        return (cmp < 0);
+      }
+    }
+    return a.GetSize() < b.GetSize();
+  }
+};
+}

mosesdecoder/moses2/Recycler.cpp ADDED Viewed

	@@ -0,0 +1,13 @@

+/*
+ * Recycler.cpp
+ *
+ *  Created on: 2 Jan 2016
+ *      Author: hieu
+ */
+#include "Recycler.h"
+namespace Moses2
+{
+} /* namespace Moses2 */

mosesdecoder/moses2/Scores.h ADDED Viewed

	@@ -0,0 +1,81 @@

+/*
+ * Scores.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <iostream>
+#include <string>
+#include "TypeDef.h"
+#include "MemPool.h"
+namespace Moses2
+{
+class FeatureFunction;
+class FeatureFunctions;
+class System;
+class Scores
+{
+public:
+  Scores(const System &system, MemPool &pool, size_t numScores);
+  Scores(const System &system, MemPool &pool, size_t numScores,
+         const Scores &origScores);
+  virtual ~Scores();
+  SCORE GetTotalScore() const {
+    return m_total;
+  }
+  const SCORE *GetScores(const FeatureFunction &featureFunction) const;
+  void Reset(const System &system);
+  void CreateFromString(const std::string &str,
+                        const FeatureFunction &featureFunction, const System &system,
+                        bool transformScores);
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  const SCORE &score);
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  const SCORE &score, size_t offset);
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  const std::vector<SCORE> &scores);
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  SCORE scores[]);
+  void PlusEquals(const System &system, const Scores &scores);
+  void MinusEquals(const System &system, const Scores &scores);
+  void Assign(const System &system, const FeatureFunction &featureFunction,
+              const SCORE &score);
+  void Assign(const System &system, const FeatureFunction &featureFunction,
+              const std::vector<SCORE> &scores);
+  std::string Debug(const System &system) const;
+  void OutputBreakdown(std::ostream &out, const System &system) const;
+  // static functions to work out estimated scores
+  static SCORE CalcWeightedScore(const System &system,
+                                 const FeatureFunction &featureFunction, SCORE scores[]);
+  static SCORE CalcWeightedScore(const System &system,
+                                 const FeatureFunction &featureFunction, SCORE score);
+protected:
+  SCORE *m_scores;
+  SCORE m_total;
+};
+}

mosesdecoder/moses2/SubPhrase.h ADDED Viewed

	@@ -0,0 +1,54 @@

+#pragma once
+#include <sstream>
+#include "Phrase.h"
+#include "Word.h"
+#include "SCFG/Word.h"
+namespace Moses2
+{
+class System;
+template<typename WORD>
+class SubPhrase: public Phrase<WORD>
+{
+public:
+  SubPhrase(const Phrase<WORD> &origPhrase, size_t start, size_t size)
+    :m_origPhrase(&origPhrase)
+    ,m_start(start)
+    ,m_size(size)
+  {}
+  virtual const WORD& operator[](size_t pos) const {
+    return (*m_origPhrase)[pos + m_start];
+  }
+  virtual size_t GetSize() const {
+    return m_size;
+  }
+  SubPhrase GetSubPhrase(size_t start, size_t size) const {
+    SubPhrase ret(*m_origPhrase, m_start + start, size);
+    return ret;
+  }
+  virtual std::string Debug(const System &system) const {
+    std::stringstream out;
+    if (GetSize()) {
+      out << (*this)[0].Debug(system);
+      for (size_t i = 1; i < GetSize(); ++i) {
+        const WORD &word = (*this)[i];
+        out << " " << word.Debug(system);
+      }
+    }
+    return out.str();
+  }
+protected:
+  const Phrase<WORD> *m_origPhrase;
+  size_t m_start, m_size;
+};
+}

mosesdecoder/moses2/TargetPhrase.cpp ADDED Viewed

	@@ -0,0 +1,15 @@

+/*
+ * TargetPhrase.cpp
+ *
+ *  Created on: 26 Apr 2016
+ *      Author: hieu
+ */
+#include "TargetPhrase.h"
+#include "System.h"
+#include "Scores.h"
+namespace Moses2
+{
+} /* namespace Moses2 */

mosesdecoder/moses2/TranslationTask.cpp ADDED Viewed

	@@ -0,0 +1,65 @@

+#include "TranslationTask.h"
+#include "System.h"
+#include "InputType.h"
+#include "PhraseBased/Manager.h"
+#include "SCFG/Manager.h"
+using namespace std;
+namespace Moses2
+{
+TranslationTask::TranslationTask(System &system,
+                                 const std::string &line,
+                                 long translationId)
+{
+  if (system.isPb) {
+    m_mgr = new Manager(system, *this, line, translationId);
+  } else {
+    m_mgr = new SCFG::Manager(system, *this, line, translationId);
+  }
+}
+TranslationTask::~TranslationTask()
+{
+}
+std::string TranslationTask::ReturnTranslation(bool nbest) const
+{
+    m_mgr->Decode();
+    string out;
+    if (nbest) {
+      out = m_mgr->OutputNBest() + "\n";
+    }
+    else {
+      out = m_mgr->OutputBest() + "\n";
+    }
+    delete m_mgr;
+    return out;
+}
+void TranslationTask::Run()
+{
+  m_mgr->Decode();
+  string out;
+  out = m_mgr->OutputBest() + "\n";
+  m_mgr->system.bestCollector->Write(m_mgr->GetTranslationId(), out);
+  if (m_mgr->system.options.nbest.nbest_size) {
+    out = m_mgr->OutputNBest();
+    m_mgr->system.nbestCollector->Write(m_mgr->GetTranslationId(), out);
+  }
+  if (!m_mgr->system.options.output.detailed_transrep_filepath.empty()) {
+    out = m_mgr->OutputTransOpt();
+    m_mgr->system.detailedTranslationCollector->Write(m_mgr->GetTranslationId(), out);
+  }
+  delete m_mgr;
+}
+}

mosesdecoder/moses2/TrellisPaths.h ADDED Viewed

	@@ -0,0 +1,64 @@

+/*
+ * TrellisPaths.h
+ *
+ *  Created on: 16 Mar 2016
+ *      Author: hieu
+ */
+#pragma once
+#include <vector>
+#include <queue>
+#include "PhraseBased/TrellisPath.h"
+namespace Moses2
+{
+template<typename T>
+struct CompareTrellisPath {
+  bool operator()(const T* pathA, const T* pathB) const {
+    return (pathA->GetFutureScore() < pathB->GetFutureScore());
+  }
+};
+template<typename T>
+class TrellisPaths
+{
+public:
+  TrellisPaths() {}
+  virtual ~TrellisPaths() {
+    while (!empty()) {
+      T *path = Get();
+      delete path;
+    }
+  }
+  bool empty() const {
+    return m_coll.empty();
+  }
+  //! add a new entry into collection
+  void Add(T *trellisPath) {
+    m_coll.push(trellisPath);
+  }
+  T *Get() {
+    T *top = m_coll.top();
+    // Detach
+    m_coll.pop();
+    return top;
+  }
+  size_t GetSize() const {
+    return m_coll.size();
+  }
+protected:
+  typedef std::priority_queue<T*, std::vector<T*>,
+          CompareTrellisPath<T> > CollectionType;
+  CollectionType m_coll;
+};
+} /* namespace Moses2 */

mosesdecoder/moses2/TypeDef.h ADDED Viewed

	@@ -0,0 +1,125 @@

+/*
+ * TypeDef.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+#include <cstddef>
+#include <vector>
+#include <istream>
+#include "Vector.h"
+namespace Moses2
+{
+class HypothesisBase;
+#define NOT_FOUND 			std::numeric_limits<size_t>::max()
+const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+const size_t DEFAULT_MAX_CHART_SPAN     = 20;
+const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
+const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
+const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
+const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
+const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
+const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
+const float LOWEST_SCORE = -100.0f;
+const float DEFAULT_BEAM_WIDTH        = 0.00001f;
+const float DEFAULT_EARLY_DISCARDING_THRESHOLD    = 0.0f;
+const float DEFAULT_TRANSLATION_OPTION_THRESHOLD  = 0.0f;
+#ifndef BOS_
+#define BOS_ "<s>" //Beginning of sentence symbol
+#endif
+#ifndef EOS_
+#define EOS_ "</s>" //End of sentence symbol
+#endif
+typedef size_t FactorType;
+typedef float SCORE;
+typedef std::vector<FactorType> FactorList;
+// Note: StaticData uses SearchAlgorithm to determine whether the translation
+// model is phrase-based or syntax-based.  If you add a syntax-based search
+// algorithm here then you should also update StaticData::IsSyntax().
+enum SearchAlgorithm {
+  Normal = 0, CubePruning = 1,
+  //,CubeGrowing = 2
+  CYKPlus = 3,
+  NormalBatch  = 4,
+  ChartIncremental = 5,
+  SyntaxS2T = 6,
+  SyntaxT2S = 7,
+  SyntaxT2S_SCFG = 8,
+  SyntaxF2S = 9,
+  CubePruningPerMiniStack = 10,
+  CubePruningPerBitmap = 11,
+  CubePruningCardinalStack = 12,
+  CubePruningBitmapStack = 13,
+  CubePruningMiniStack = 14,
+  DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
+};
+enum InputTypeEnum {
+  SentenceInput         = 0,
+  ConfusionNetworkInput = 1,
+  WordLatticeInput      = 2,
+  TreeInputType         = 3,
+  //,WordLatticeInput2 = 4,
+  TabbedSentenceInput    = 5,
+  ForestInputType        = 6,
+  SentenceInputWithCandidates = 7,
+};
+enum XmlInputType {
+  XmlPassThrough = 0,
+  XmlIgnore      = 1,
+  XmlExclusive   = 2,
+  XmlInclusive   = 3,
+  XmlConstraint  = 4
+};
+enum WordAlignmentSort {
+  NoSort = 0,
+  TargetOrder = 1
+};
+enum S2TParsingAlgorithm {
+  RecursiveCYKPlus,
+  Scope3
+};
+enum SourceLabelOverlap {
+  SourceLabelOverlapAdd = 0,
+  SourceLabelOverlapReplace = 1,
+  SourceLabelOverlapDiscard = 2
+};
+/////////////////////////
+// MOSES2 only
+class StackAdd
+{
+public:
+  bool added;
+  HypothesisBase *other;
+  StackAdd() {
+  }
+  StackAdd(bool vadded, HypothesisBase *vOther) :
+    added(vadded), other(vOther) {
+  }
+};
+class Hypothesis;
+typedef Vector<Hypothesis*> Batch;
+class Factor;
+typedef std::vector<const Factor*> Context;
+}

mosesdecoder/moses2/Vector.cpp ADDED Viewed

	@@ -0,0 +1,14 @@

+/*
+ * Vector.cpp
+ *
+ *  Created on: 7 Dec 2015
+ *      Author: hieu
+ */
+#include "Vector.h"
+namespace Moses2
+{
+}