diff --git a/fairseq-0.10.2/fairseq_cli/preprocess.py b/fairseq-0.10.2/fairseq_cli/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa77da8dba74e3e07cadfc66abf8fb5fe7bddd6c
--- /dev/null
+++ b/fairseq-0.10.2/fairseq_cli/preprocess.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data pre-processing: build vocabularies and binarize training data.
+"""
+
+import logging
+import os
+import shutil
+import sys
+from collections import Counter
+from itertools import zip_longest
+from multiprocessing import Pool
+
+from fairseq import options, tasks, utils
+from fairseq.binarizer import Binarizer
+from fairseq.data import indexed_dataset
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.preprocess")
+
+
+def main(args):
+    utils.import_user_module(args)
+
+    os.makedirs(args.destdir, exist_ok=True)
+
+    logger.addHandler(
+        logging.FileHandler(
+            filename=os.path.join(args.destdir, "preprocess.log"),
+        )
+    )
+    logger.info(args)
+
+    task = tasks.get_task(args.task)
+
+    def train_path(lang):
+        return "{}{}".format(args.trainpref, ("." + lang) if lang else "")
+
+    def file_name(prefix, lang):
+        fname = prefix
+        if lang is not None:
+            fname += ".{lang}".format(lang=lang)
+        return fname
+
+    def dest_path(prefix, lang):
+        return os.path.join(args.destdir, file_name(prefix, lang))
+
+    def dict_path(lang):
+        return dest_path("dict", lang) + ".txt"
+
+    def build_dictionary(filenames, src=False, tgt=False):
+        assert src ^ tgt
+        return task.build_dictionary(
+            filenames,
+            workers=args.workers,
+            threshold=args.thresholdsrc if src else args.thresholdtgt,
+            nwords=args.nwordssrc if src else args.nwordstgt,
+            padding_factor=args.padding_factor,
+        )
+
+    target = not args.only_source
+
+    if not args.srcdict and os.path.exists(dict_path(args.source_lang)):
+        raise FileExistsError(dict_path(args.source_lang))
+    if target and not args.tgtdict and os.path.exists(dict_path(args.target_lang)):
+        raise FileExistsError(dict_path(args.target_lang))
+
+    if args.joined_dictionary:
+        assert (
+            not args.srcdict or not args.tgtdict
+        ), "cannot use both --srcdict and --tgtdict with --joined-dictionary"
+
+        if args.srcdict:
+            src_dict = task.load_dictionary(args.srcdict)
+        elif args.tgtdict:
+            src_dict = task.load_dictionary(args.tgtdict)
+        else:
+            assert (
+                args.trainpref
+            ), "--trainpref must be set if --srcdict is not specified"
+            src_dict = build_dictionary(
+                {train_path(lang) for lang in [args.source_lang, args.target_lang]},
+                src=True,
+            )
+        tgt_dict = src_dict
+    else:
+        if args.srcdict:
+            src_dict = task.load_dictionary(args.srcdict)
+        else:
+            assert (
+                args.trainpref
+            ), "--trainpref must be set if --srcdict is not specified"
+            src_dict = build_dictionary([train_path(args.source_lang)], src=True)
+
+        if target:
+            if args.tgtdict:
+                tgt_dict = task.load_dictionary(args.tgtdict)
+            else:
+                assert (
+                    args.trainpref
+                ), "--trainpref must be set if --tgtdict is not specified"
+                tgt_dict = build_dictionary([train_path(args.target_lang)], tgt=True)
+        else:
+            tgt_dict = None
+
+    src_dict.save(dict_path(args.source_lang))
+    if target and tgt_dict is not None:
+        tgt_dict.save(dict_path(args.target_lang))
+
+    def make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers):
+        logger.info("[{}] Dictionary: {} types".format(lang, len(vocab)))
+        n_seq_tok = [0, 0]
+        replaced = Counter()
+
+        def merge_result(worker_result):
+            replaced.update(worker_result["replaced"])
+            n_seq_tok[0] += worker_result["nseq"]
+            n_seq_tok[1] += worker_result["ntok"]
+
+        input_file = "{}{}".format(
+            input_prefix, ("." + lang) if lang is not None else ""
+        )
+        offsets = Binarizer.find_offsets(input_file, num_workers)
+        pool = None
+        if num_workers > 1:
+            pool = Pool(processes=num_workers - 1)
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                pool.apply_async(
+                    binarize,
+                    (
+                        args,
+                        input_file,
+                        vocab,
+                        prefix,
+                        lang,
+                        offsets[worker_id],
+                        offsets[worker_id + 1],
+                    ),
+                    callback=merge_result,
+                )
+            pool.close()
+
+        ds = indexed_dataset.make_builder(
+            dataset_dest_file(args, output_prefix, lang, "bin"),
+            impl=args.dataset_impl,
+            vocab_size=len(vocab),
+        )
+        merge_result(
+            Binarizer.binarize(
+                input_file, vocab, lambda t: ds.add_item(t), offset=0, end=offsets[1]
+            )
+        )
+        if num_workers > 1:
+            pool.join()
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                temp_file_path = dataset_dest_prefix(args, prefix, lang)
+                ds.merge_file_(temp_file_path)
+                os.remove(indexed_dataset.data_file_path(temp_file_path))
+                os.remove(indexed_dataset.index_file_path(temp_file_path))
+
+        ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
+
+        logger.info(
+            "[{}] {}: {} sents, {} tokens, {:.3}% replaced by {}".format(
+                lang,
+                input_file,
+                n_seq_tok[0],
+                n_seq_tok[1],
+                100 * sum(replaced.values()) / n_seq_tok[1],
+                vocab.unk_word,
+            )
+        )
+
+    def make_binary_alignment_dataset(input_prefix, output_prefix, num_workers):
+        nseq = [0]
+
+        def merge_result(worker_result):
+            nseq[0] += worker_result["nseq"]
+
+        input_file = input_prefix
+        offsets = Binarizer.find_offsets(input_file, num_workers)
+        pool = None
+        if num_workers > 1:
+            pool = Pool(processes=num_workers - 1)
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                pool.apply_async(
+                    binarize_alignments,
+                    (
+                        args,
+                        input_file,
+                        utils.parse_alignment,
+                        prefix,
+                        offsets[worker_id],
+                        offsets[worker_id + 1],
+                    ),
+                    callback=merge_result,
+                )
+            pool.close()
+
+        ds = indexed_dataset.make_builder(
+            dataset_dest_file(args, output_prefix, None, "bin"), impl=args.dataset_impl
+        )
+
+        merge_result(
+            Binarizer.binarize_alignments(
+                input_file,
+                utils.parse_alignment,
+                lambda t: ds.add_item(t),
+                offset=0,
+                end=offsets[1],
+            )
+        )
+        if num_workers > 1:
+            pool.join()
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                temp_file_path = dataset_dest_prefix(args, prefix, None)
+                ds.merge_file_(temp_file_path)
+                os.remove(indexed_dataset.data_file_path(temp_file_path))
+                os.remove(indexed_dataset.index_file_path(temp_file_path))
+
+        ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
+
+        logger.info("[alignments] {}: parsed {} alignments".format(input_file, nseq[0]))
+
+    def make_dataset(vocab, input_prefix, output_prefix, lang, num_workers=1):
+        if args.dataset_impl == "raw":
+            # Copy original text file to destination folder
+            output_text_file = dest_path(
+                output_prefix + ".{}-{}".format(args.source_lang, args.target_lang),
+                lang,
+            )
+            shutil.copyfile(file_name(input_prefix, lang), output_text_file)
+        else:
+            make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers)
+
+    def make_all(lang, vocab):
+        if args.trainpref:
+            make_dataset(vocab, args.trainpref, "train", lang, num_workers=args.workers)
+        if args.validpref:
+            for k, validpref in enumerate(args.validpref.split(",")):
+                outprefix = "valid{}".format(k) if k > 0 else "valid"
+                make_dataset(
+                    vocab, validpref, outprefix, lang, num_workers=args.workers
+                )
+        if args.testpref:
+            for k, testpref in enumerate(args.testpref.split(",")):
+                outprefix = "test{}".format(k) if k > 0 else "test"
+                make_dataset(vocab, testpref, outprefix, lang, num_workers=args.workers)
+
+    def make_all_alignments():
+        if args.trainpref and os.path.exists(args.trainpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.trainpref + "." + args.align_suffix,
+                "train.align",
+                num_workers=args.workers,
+            )
+        if args.validpref and os.path.exists(args.validpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.validpref + "." + args.align_suffix,
+                "valid.align",
+                num_workers=args.workers,
+            )
+        if args.testpref and os.path.exists(args.testpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.testpref + "." + args.align_suffix,
+                "test.align",
+                num_workers=args.workers,
+            )
+
+    make_all(args.source_lang, src_dict)
+    if target:
+        make_all(args.target_lang, tgt_dict)
+    if args.align_suffix:
+        make_all_alignments()
+
+    logger.info("Wrote preprocessed data to {}".format(args.destdir))
+
+    if args.alignfile:
+        assert args.trainpref, "--trainpref must be set if --alignfile is specified"
+        src_file_name = train_path(args.source_lang)
+        tgt_file_name = train_path(args.target_lang)
+        freq_map = {}
+        with open(args.alignfile, "r", encoding="utf-8") as align_file:
+            with open(src_file_name, "r", encoding="utf-8") as src_file:
+                with open(tgt_file_name, "r", encoding="utf-8") as tgt_file:
+                    for a, s, t in zip_longest(align_file, src_file, tgt_file):
+                        si = src_dict.encode_line(s, add_if_not_exist=False)
+                        ti = tgt_dict.encode_line(t, add_if_not_exist=False)
+                        ai = list(map(lambda x: tuple(x.split("-")), a.split()))
+                        for sai, tai in ai:
+                            srcidx = si[int(sai)]
+                            tgtidx = ti[int(tai)]
+                            if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk():
+                                assert srcidx != src_dict.pad()
+                                assert srcidx != src_dict.eos()
+                                assert tgtidx != tgt_dict.pad()
+                                assert tgtidx != tgt_dict.eos()
+
+                                if srcidx not in freq_map:
+                                    freq_map[srcidx] = {}
+                                if tgtidx not in freq_map[srcidx]:
+                                    freq_map[srcidx][tgtidx] = 1
+                                else:
+                                    freq_map[srcidx][tgtidx] += 1
+
+        align_dict = {}
+        for srcidx in freq_map.keys():
+            align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get)
+
+        with open(
+            os.path.join(
+                args.destdir,
+                "alignment.{}-{}.txt".format(args.source_lang, args.target_lang),
+            ),
+            "w",
+            encoding="utf-8",
+        ) as f:
+            for k, v in align_dict.items():
+                print("{} {}".format(src_dict[k], tgt_dict[v]), file=f)
+
+
+def binarize(args, filename, vocab, output_prefix, lang, offset, end, append_eos=True):
+    ds = indexed_dataset.make_builder(
+        dataset_dest_file(args, output_prefix, lang, "bin"),
+        impl=args.dataset_impl,
+        vocab_size=len(vocab),
+    )
+
+    def consumer(tensor):
+        ds.add_item(tensor)
+
+    res = Binarizer.binarize(
+        filename, vocab, consumer, append_eos=append_eos, offset=offset, end=end
+    )
+    ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
+    return res
+
+
+def binarize_alignments(args, filename, parse_alignment, output_prefix, offset, end):
+    ds = indexed_dataset.make_builder(
+        dataset_dest_file(args, output_prefix, None, "bin"),
+        impl=args.dataset_impl,
+        vocab_size=None,
+    )
+
+    def consumer(tensor):
+        ds.add_item(tensor)
+
+    res = Binarizer.binarize_alignments(
+        filename, parse_alignment, consumer, offset=offset, end=end
+    )
+    ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
+    return res
+
+
+def dataset_dest_prefix(args, output_prefix, lang):
+    base = "{}/{}".format(args.destdir, output_prefix)
+    if lang is not None:
+        lang_part = ".{}-{}.{}".format(args.source_lang, args.target_lang, lang)
+    elif args.only_source:
+        lang_part = ""
+    else:
+        lang_part = ".{}-{}".format(args.source_lang, args.target_lang)
+
+    return "{}{}".format(base, lang_part)
+
+
+def dataset_dest_file(args, output_prefix, lang, extension):
+    base = dataset_dest_prefix(args, output_prefix, lang)
+    return "{}.{}".format(base, extension)
+
+
+def get_offsets(input_file, num_workers):
+    return Binarizer.find_offsets(input_file, num_workers)
+
+
+def cli_main():
+    parser = options.get_preprocessing_parser()
+    args = parser.parse_args()
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq-0.10.2/fairseq_cli/score.py b/fairseq-0.10.2/fairseq_cli/score.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8354eb95a8b786c0e21b8dfe777f36af6f261a3
--- /dev/null
+++ b/fairseq-0.10.2/fairseq_cli/score.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+BLEU scoring of generated translations against reference translations.
+"""
+
+import argparse
+import os
+import sys
+
+from fairseq.data import dictionary
+from fairseq.scoring import bleu
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Command-line script for BLEU scoring."
+    )
+    # fmt: off
+    parser.add_argument('-s', '--sys', default='-', help='system output')
+    parser.add_argument('-r', '--ref', required=True, help='references')
+    parser.add_argument('-o', '--order', default=4, metavar='N',
+                        type=int, help='consider ngrams up to this order')
+    parser.add_argument('--ignore-case', action='store_true',
+                        help='case-insensitive scoring')
+    parser.add_argument('--sacrebleu', action='store_true',
+                        help='score with sacrebleu')
+    parser.add_argument('--sentence-bleu', action='store_true',
+                        help='report sentence-level BLEUs (i.e., with +1 smoothing)')
+    # fmt: on
+    return parser
+
+
+def cli_main():
+    parser = get_parser()
+    args = parser.parse_args()
+    print(args)
+
+    assert args.sys == "-" or os.path.exists(
+        args.sys
+    ), "System output file {} does not exist".format(args.sys)
+    assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)
+
+    dict = dictionary.Dictionary()
+
+    def readlines(fd):
+        for line in fd.readlines():
+            if args.ignore_case:
+                yield line.lower()
+            else:
+                yield line
+
+    if args.sacrebleu:
+        import sacrebleu
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                print(sacrebleu.corpus_bleu(fdsys, [fdref]))
+
+    elif args.sentence_bleu:
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
+                for i, (sys_tok, ref_tok) in enumerate(
+                    zip(readlines(fdsys), readlines(fdref))
+                ):
+                    scorer.reset(one_init=True)
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                    print(i, scorer.result_string(args.order))
+
+    else:
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
+                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                print(scorer.result_string(args.order))
+
+    if args.sys == "-":
+        score(sys.stdin)
+    else:
+        with open(args.sys, "r") as f:
+            score(f)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq-0.10.2/tests/test_backtranslation_dataset.py b/fairseq-0.10.2/tests/test_backtranslation_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dffc3b49387dfdc046ea23d7db179377040b7cbc
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_backtranslation_dataset.py
@@ -0,0 +1,123 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.data import (
+    BacktranslationDataset,
+    LanguagePairDataset,
+    TransformEosDataset,
+)
+from fairseq.sequence_generator import SequenceGenerator
+
+
+class TestBacktranslationDataset(unittest.TestCase):
+    def setUp(self):
+        (
+            self.tgt_dict,
+            self.w1,
+            self.w2,
+            self.src_tokens,
+            self.src_lengths,
+            self.model,
+        ) = test_utils.sequence_generator_setup()
+
+        dummy_src_samples = self.src_tokens
+
+        self.tgt_dataset = test_utils.TestDataset(data=dummy_src_samples)
+        self.cuda = torch.cuda.is_available()
+
+    def _backtranslation_dataset_helper(
+        self,
+        remove_eos_from_input_src,
+        remove_eos_from_output_src,
+    ):
+        tgt_dataset = LanguagePairDataset(
+            src=self.tgt_dataset,
+            src_sizes=self.tgt_dataset.sizes,
+            src_dict=self.tgt_dict,
+            tgt=None,
+            tgt_sizes=None,
+            tgt_dict=None,
+        )
+
+        generator = SequenceGenerator(
+            [self.model],
+            tgt_dict=self.tgt_dict,
+            max_len_a=0,
+            max_len_b=200,
+            beam_size=2,
+            unk_penalty=0,
+        )
+
+        backtranslation_dataset = BacktranslationDataset(
+            tgt_dataset=TransformEosDataset(
+                dataset=tgt_dataset,
+                eos=self.tgt_dict.eos(),
+                # remove eos from the input src
+                remove_eos_from_src=remove_eos_from_input_src,
+            ),
+            src_dict=self.tgt_dict,
+            backtranslation_fn=(
+                lambda sample: generator.generate([self.model], sample)
+            ),
+            output_collater=TransformEosDataset(
+                dataset=tgt_dataset,
+                eos=self.tgt_dict.eos(),
+                # if we remove eos from the input src, then we need to add it
+                # back to the output tgt
+                append_eos_to_tgt=remove_eos_from_input_src,
+                remove_eos_from_src=remove_eos_from_output_src,
+            ).collater,
+            cuda=self.cuda,
+        )
+        dataloader = torch.utils.data.DataLoader(
+            backtranslation_dataset,
+            batch_size=2,
+            collate_fn=backtranslation_dataset.collater,
+        )
+        backtranslation_batch_result = next(iter(dataloader))
+
+        eos, pad, w1, w2 = self.tgt_dict.eos(), self.tgt_dict.pad(), self.w1, self.w2
+
+        # Note that we sort by src_lengths and add left padding, so actually
+        # ids will look like: [1, 0]
+        expected_src = torch.LongTensor([[w1, w2, w1, eos], [pad, pad, w1, eos]])
+        if remove_eos_from_output_src:
+            expected_src = expected_src[:, :-1]
+        expected_tgt = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]])
+        generated_src = backtranslation_batch_result["net_input"]["src_tokens"]
+        tgt_tokens = backtranslation_batch_result["target"]
+
+        self.assertTensorEqual(expected_src, generated_src)
+        self.assertTensorEqual(expected_tgt, tgt_tokens)
+
+    def test_backtranslation_dataset_no_eos_in_output_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=False,
+            remove_eos_from_output_src=True,
+        )
+
+    def test_backtranslation_dataset_with_eos_in_output_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=False,
+            remove_eos_from_output_src=False,
+        )
+
+    def test_backtranslation_dataset_no_eos_in_input_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=True,
+            remove_eos_from_output_src=False,
+        )
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq-0.10.2/tests/test_constraints.py b/fairseq-0.10.2/tests/test_constraints.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c37f7e1fb26d8ea5349fedd3a60f566d09cf598
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_constraints.py
@@ -0,0 +1,269 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+import unittest
+
+import torch
+from fairseq.token_generation_constraints import *
+
+
+def tensorize(constraints: List[List[int]]) -> torch.Tensor:
+    return [torch.tensor(x) for x in constraints]
+
+
+class TestHelperRoutines(unittest.TestCase):
+    def setUp(self):
+        self.examples = [
+            ([[]], torch.tensor([[0]])),
+            ([[], []], torch.tensor([[0], [0]])),
+            ([[torch.tensor([1, 2])], []], torch.tensor([[1, 1, 2, 0], [0, 0, 0, 0]])),
+            (
+                [
+                    [
+                        torch.tensor([3, 1, 2]),
+                        torch.tensor([3]),
+                        torch.tensor([4, 5, 6, 7]),
+                    ],
+                    [],
+                    [torch.tensor([1, 8, 9, 10, 1, 4, 11, 12])],
+                ],
+                torch.tensor(
+                    [
+                        [3, 3, 1, 2, 0, 3, 0, 4, 5, 6, 7, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [1, 1, 8, 9, 10, 1, 4, 11, 12, 0, 0, 0],
+                    ]
+                ),
+            ),
+        ]
+
+    def test_packing(self):
+        """Ensures the list of lists of tensors gets packed correctly."""
+        for batch_constraints, expected_tensor in self.examples:
+            packed = pack_constraints(batch_constraints)
+            assert torch.equal(packed, expected_tensor)
+
+
+class TestUnorderedConstraintState(unittest.TestCase):
+    def setUp(self):
+        # Tuples of (contraint set, expected printed graph, token counts per node)
+        self.examples = [
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                "([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
+                {1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
+            ),
+            ([], "[None].False#0", {}),
+            (tensorize([[0]]), "([None].False#1 [0].True#1)", {0: 1}),
+            (
+                tensorize([[100000, 1, 2, 3, 4, 5]]),
+                "([None].False#1 ([100000].False#1 ([1].False#1 ([2].False#1 ([3].False#1 ([4].False#1 [5].True#1))))))",
+                {100000: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                "([None].False#2 ([1].False#2 [2].True#2))",
+                {1: 2, 2: 2},
+            ),
+            (
+                tensorize([[1, 2], [3, 4]]),
+                "([None].False#2 ([1].False#1 [2].True#1) ([3].False#1 [4].True#1))",
+                {1: 1, 2: 1, 3: 1, 4: 1},
+            ),
+        ]
+
+        self.sequences = [
+            (
+                self.examples[0][0],
+                [],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2],
+                {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2, 94],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [1, 3, 999, 1, 4],
+                {"bank": 4, "num_completed": 2, "finished": False, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 3, 999, 1, 4, 999],
+                {"bank": 4, "num_completed": 2, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [4, 5, 6, 8],
+                {"bank": 2, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                # Tricky, because in last three, goes down [1->4] branch, could miss [1] and [4->5]
+                # [[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]],
+                [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": True},
+            ),
+            (
+                tensorize([[1], [2, 3]]),
+                # Should not be able to get credit for entering 1 a second time
+                [1, 1],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[4][0],
+                [1, 2, 1, 2],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                self.examples[4][0],
+                [1, 2, 1, 2, 1],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
+            ),
+            (
+                self.examples[5][0],
+                [1, 2, 3, 4, 5],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
+            ),
+        ]
+
+    def test_graphs(self):
+        """
+        Test whether unordered graph systems are created correctly.
+        """
+        for example in self.examples:
+            constraints, expected, gold_counts = example
+            c = ConstraintNode.create(constraints)
+            assert (
+                ConstraintNode.print_graph(c) == expected
+            ), f"got {ConstraintNode.print_graph(c)}, expected {expected}"
+            assert (
+                c.token_counts() == gold_counts
+            ), f"{c} got {c.token_counts()} wanted {gold_counts}"
+
+    def test_next_tokens(self):
+        """
+        Tests that the set of next tokens is correct.
+        """
+        for example in self.examples:
+            constraints, expected, gold_counts = example
+            root = ConstraintNode.create(constraints)
+
+            root_tokens = set(root.children.keys())
+            for sequence in constraints:
+                state = UnorderedConstraintState(root)
+                for token in sequence:
+                    all_tokens = root_tokens.union(state.node.children.keys())
+                    assert (
+                        all_tokens == state.next_tokens()
+                    ), f"ALL {all_tokens} NEXT {state.next_tokens()}"
+                    state = state.advance(token)
+
+    def test_sequences(self):
+        for constraints, tokens, expected in self.sequences:
+            state = UnorderedConstraintState.create(pack_constraints([constraints])[0])
+            for token in tokens:
+                state = state.advance(token)
+            result = {}
+            for attr in expected.keys():
+                result[attr] = getattr(state, attr)
+
+            assert (
+                result == expected
+            ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
+
+
+class TestOrderedConstraintState(unittest.TestCase):
+    def setUp(self):
+        self.sequences = [
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2],
+                {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 94],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 3, 999, 1, 4],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 999, 999],
+                {"bank": 3, "num_completed": 1, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 77, 1, 3, 1],
+                {"bank": 6, "num_completed": 2, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 999, 1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1], [2, 3]]),
+                [1, 1],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                [1, 2, 1, 2],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                [1, 2, 1, 2, 1],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [3, 4]]),
+                [1, 2, 3, 4, 5],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+        ]
+
+    def test_sequences(self):
+        for i, (constraints, tokens, expected) in enumerate(self.sequences):
+            state = OrderedConstraintState.create(pack_constraints([constraints])[0])
+            for token in tokens:
+                state = state.advance(token)
+            result = {}
+            for attr in expected.keys():
+                result[attr] = getattr(state, attr)
+            assert (
+                result == expected
+            ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq-0.10.2/tests/test_convtbc.py b/fairseq-0.10.2/tests/test_convtbc.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a3c9b91e70f597ab77b9b01459cc429db5d7956
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_convtbc.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+import torch.nn as nn
+from fairseq.modules import ConvTBC
+
+
+class TestConvTBC(unittest.TestCase):
+    def test_convtbc(self):
+        # ksz, in_channels, out_channels
+        conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1)
+        # out_channels, in_channels, ksz
+        conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1)
+
+        conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2))
+        conv_tbc.bias.data.copy_(conv1d.bias.data)
+
+        input_tbc = torch.randn(7, 2, 4, requires_grad=True)
+        input1d = input_tbc.data.transpose(0, 1).transpose(1, 2)
+        input1d.requires_grad = True
+
+        output_tbc = conv_tbc(input_tbc)
+        output1d = conv1d(input1d)
+
+        self.assertAlmostEqual(
+            output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data
+        )
+
+        grad_tbc = torch.randn(output_tbc.size())
+        grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous()
+
+        output_tbc.backward(grad_tbc)
+        output1d.backward(grad1d)
+
+        self.assertAlmostEqual(
+            conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data
+        )
+        self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data)
+        self.assertAlmostEqual(
+            input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data
+        )
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq-0.10.2/tests/test_inference_dropout.py b/fairseq-0.10.2/tests/test_inference_dropout.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd5edd43d6a6f1fe06f8439cb9cb9a565e8a1074
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_inference_dropout.py
@@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import unittest
+
+from fairseq.models.transformer import TransformerModel
+from tests.test_sequence_generator import get_dummy_task_and_parser
+
+
+class TestInferenceDropout(unittest.TestCase):
+    def setUp(self):
+        self.task, self.parser = get_dummy_task_and_parser()
+        TransformerModel.add_args(self.parser)
+        self.args = self.parser.parse_args([])
+        self.args.encoder_layers = 2
+        self.args.decoder_layers = 1
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_sets_inference_dropout_to_true(self):
+        self.args.retain_dropout = True
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        self.transformer_model.prepare_for_inference_(self.args)
+        assert self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.encoder.layers:
+            assert layer.dropout_module.apply_during_inference
+
+    def test_inference_dropout_false_by_default(self):
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        self.transformer_model.prepare_for_inference_(self.args)
+        assert not self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert not self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.encoder.layers:
+            assert not layer.dropout_module.apply_during_inference
+        for layer in self.transformer_model.decoder.layers:
+            assert not layer.dropout_module.apply_during_inference
+
+    def test_applies_training_mode(self):
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        assert self.transformer_model.encoder.dropout_module.training
+        for layer in self.transformer_model.encoder.layers:
+            assert layer.dropout_module.training
+
+        self.transformer_model.eval()
+        assert not self.transformer_model.decoder.dropout_module.training
+        for layer in self.transformer_model.encoder.layers:
+            assert not layer.dropout_module.training
+
+    def test_retain_modules(self):
+        self.args.retain_dropout = True
+        self.args.retain_dropout_modules = [
+            "TransformerEncoder",
+            "TransformerEncoderLayer",
+        ]
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        self.transformer_model.prepare_for_inference_(self.args)
+        assert self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert not self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.decoder.layers:
+            assert not layer.dropout_module.apply_during_inference
diff --git a/fairseq-0.10.2/tests/test_metrics.py b/fairseq-0.10.2/tests/test_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..2de6969cf4445bc6cda44dacf6de765ea30d5f5b
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_metrics.py
@@ -0,0 +1,77 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+import uuid
+
+from fairseq import metrics
+
+
+class TestMetrics(unittest.TestCase):
+    def test_nesting(self):
+        with metrics.aggregate() as a:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate() as b:
+                metrics.log_scalar("loss", 2)
+
+        self.assertEqual(a.get_smoothed_values()["loss"], 1.5)
+        self.assertEqual(b.get_smoothed_values()["loss"], 2)
+
+    def test_new_root(self):
+        with metrics.aggregate() as a:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate(new_root=True) as b:
+                metrics.log_scalar("loss", 2)
+
+        self.assertEqual(a.get_smoothed_values()["loss"], 1)
+        self.assertEqual(b.get_smoothed_values()["loss"], 2)
+
+    def test_nested_new_root(self):
+        with metrics.aggregate() as layer1:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate(new_root=True) as layer2:
+                metrics.log_scalar("loss", 2)
+                with metrics.aggregate() as layer3:
+                    metrics.log_scalar("loss", 3)
+                    with metrics.aggregate(new_root=True) as layer4:
+                        metrics.log_scalar("loss", 4)
+            metrics.log_scalar("loss", 1.5)
+
+        self.assertEqual(layer4.get_smoothed_values()["loss"], 4)
+        self.assertEqual(layer3.get_smoothed_values()["loss"], 3)
+        self.assertEqual(layer2.get_smoothed_values()["loss"], 2.5)
+        self.assertEqual(layer1.get_smoothed_values()["loss"], 1.25)
+
+    def test_named(self):
+        name = str(uuid.uuid4())
+        metrics.reset_meters(name)
+
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 1)
+
+        metrics.log_scalar("loss", 3)
+
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 2)
+
+        self.assertEqual(metrics.get_smoothed_values(name)["loss"], 1.5)
+
+    def test_nested_duplicate_names(self):
+        name = str(uuid.uuid4())
+        metrics.reset_meters(name)
+
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate() as other:
+                with metrics.aggregate(name):
+                    metrics.log_scalar("loss", 2)
+            metrics.log_scalar("loss", 6)
+
+        self.assertEqual(metrics.get_smoothed_values(name)["loss"], 3)
+        self.assertEqual(other.get_smoothed_values()["loss"], 2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq-0.10.2/tests/test_sequence_scorer.py b/fairseq-0.10.2/tests/test_sequence_scorer.py
new file mode 100644
index 0000000000000000000000000000000000000000..42f9447b599bcd7a9913aec37d94ea5078ff43a3
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_sequence_scorer.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.sequence_scorer import SequenceScorer
+
+
+class TestSequenceScorer(unittest.TestCase):
+    def test_sequence_scorer(self):
+        # construct dummy dictionary
+        d = test_utils.dummy_dictionary(vocab_size=2)
+        self.assertEqual(d.pad(), 1)
+        self.assertEqual(d.eos(), 2)
+        self.assertEqual(d.unk(), 3)
+        eos = d.eos()
+        w1 = 4
+        w2 = 5
+
+        # construct dataloader
+        data = [
+            {
+                "source": torch.LongTensor([w1, w2, eos]),
+                "target": torch.LongTensor([w1, w2, w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w2, eos]),
+                "target": torch.LongTensor([w2, w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w2, eos]),
+                "target": torch.LongTensor([w2, eos]),
+            },
+        ]
+        data_itr = test_utils.dummy_dataloader(data)
+
+        # specify expected output probabilities
+        args = argparse.Namespace()
+        unk = 0.0
+        args.beam_probs = [
+            # step 0:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 0.6, 0.4],  # sentence 1
+                    [0.0, unk, 0.4, 0.6],  # sentence 2
+                    [0.0, unk, 0.7, 0.3],  # sentence 3
+                ]
+            ),
+            # step 1:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 0.2, 0.7],  # sentence 1
+                    [0.0, unk, 0.8, 0.2],  # sentence 2
+                    [0.7, unk, 0.1, 0.2],  # sentence 3
+                ]
+            ),
+            # step 2:
+            torch.FloatTensor(
+                [
+                    # eos       w1    w2
+                    [0.10, unk, 0.50, 0.4],  # sentence 1
+                    [0.15, unk, 0.15, 0.7],  # sentence 2
+                    [0.00, unk, 0.00, 0.0],  # sentence 3
+                ]
+            ),
+            # step 3:
+            torch.FloatTensor(
+                [
+                    # eos      w1    w2
+                    [0.9, unk, 0.05, 0.05],  # sentence 1
+                    [0.0, unk, 0.00, 0.0],  # sentence 2
+                    [0.0, unk, 0.00, 0.0],  # sentence 3
+                ]
+            ),
+        ]
+        expected_scores = [
+            [0.6, 0.7, 0.5, 0.9],  # sentence 1
+            [0.6, 0.8, 0.15],  # sentence 2
+            [0.3, 0.7],  # sentence 3
+        ]
+
+        task = test_utils.TestTranslationTask.setup_task(args, d, d)
+        model = task.build_model(args)
+        scorer = SequenceScorer(task.target_dictionary)
+        for sample in data_itr:
+            hypos = task.inference_step(scorer, [model], sample)
+            for id, hypos_id in zip(sample["id"].tolist(), hypos):
+                self.assertHypoTokens(hypos_id[0], data[id]["target"])
+                self.assertHypoScore(hypos_id[0], expected_scores[id])
+
+    def assertHypoTokens(self, hypo, tokens):
+        self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens))
+
+    def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
+        self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
+        score = pos_scores.sum()
+        if normalized:
+            score /= pos_scores.numel() ** lenpen
+        self.assertLess(abs(score - hypo["score"]), 1e-6)
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq-0.10.2/tests/test_token_block_dataset.py b/fairseq-0.10.2/tests/test_token_block_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea315b4e67a6176feb3e35c468ca1179b4e0e3c4
--- /dev/null
+++ b/fairseq-0.10.2/tests/test_token_block_dataset.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.data import TokenBlockDataset
+
+
+class TestTokenBlockDataset(unittest.TestCase):
+    def _build_dataset(self, data, **kwargs):
+        sizes = [len(x) for x in data]
+        underlying_ds = test_utils.TestDataset(data)
+        return TokenBlockDataset(underlying_ds, sizes, **kwargs)
+
+    def test_eos_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [1])
+        self.assertEqual(ds[2].tolist(), [8, 7, 6, 1])
+
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [8, 7, 6, 1])
+        self.assertEqual(ds[2].tolist(), [1])
+
+    def test_block_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([9, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=3, pad=0, eos=1, break_mode="none")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3])
+        self.assertEqual(ds[1].tolist(), [2, 1, 8])
+        self.assertEqual(ds[2].tolist(), [7, 6, 1])
+        self.assertEqual(ds[3].tolist(), [9, 1])
+
+    def test_complete_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([9, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(
+            data, block_size=6, pad=0, eos=1, break_mode="complete"
+        )
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [8, 7, 6, 1, 9, 1])
+
+        data = [
+            torch.tensor([4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([5, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+            torch.tensor([6, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(
+            data, block_size=3, pad=0, eos=1, break_mode="complete"
+        )
+        self.assertEqual(ds[0].tolist(), [4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [5, 1, 1])
+        self.assertEqual(ds[2].tolist(), [6, 1])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/mosesdecoder/.beautify-ignore b/mosesdecoder/.beautify-ignore
new file mode 100644
index 0000000000000000000000000000000000000000..b7eb51a205542217c7b265ec47c077d9d3f89e95
--- /dev/null
+++ b/mosesdecoder/.beautify-ignore
@@ -0,0 +1,38 @@
+# Files and directories that beautify.py should not clean up.
+#
+# This file is not as advanced as, say, .gitignore.  It only supports files
+# and directory paths relative to the project root, one per line, no globs,
+# no quotes.
+#
+# Leading and trailing whitespace is stripped from filenames, but internal
+# whitespace is preserved.
+#
+# Lines starting with a hash mark, such as this one, are comments.  The hash
+# mark must be the first character on the line.  Blank lines are ignored.
+#
+# The .beautify-ignore file must be encoded in UTF-8.
+
+boost
+contrib
+irstlm
+jam-files
+lm
+mingw/MosesGUI/icons_rc.py
+mingw/MosesGUI/Ui_credits.py
+mingw/MosesGUI/Ui_mainWindow.py
+moses/TranslationModel/UG
+moses/server
+moses/parameters
+moses/thread_safe_container.h
+phrase-extract/pcfg-common
+phrase-extract/syntax-common
+randlm
+# Filename suffixes in here are language codes, so e.g. ".pl" means
+# Polish, not Perl.
+scripts/share/nonbreaking_prefixes
+search
+srilm
+util
+xmlrpc-c
+.git
+util/ug_cache_with_timeout.h
diff --git a/mosesdecoder/.travis.yml b/mosesdecoder/.travis.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c80b60de57a5e588801e429aebaeb2352194f663
--- /dev/null
+++ b/mosesdecoder/.travis.yml
@@ -0,0 +1,24 @@
+sudo: false
+dist: trusty
+language: c
+compiler: gcc
+env:
+  matrix: 
+addons:
+  apt:
+    sources:
+      - ubuntu-toolchain-r-test
+    packages:
+      - subversion
+      - automake
+      - libtool
+      - zlib1g-dev
+      - libbz2-dev
+      - liblzma-dev
+      - libboost-all-dev
+      - libgoogle-perftools-dev
+      - libxmlrpc-c++.*-dev
+      - cmake
+      - csh
+script:
+- ./bjam -j4
diff --git a/mosesdecoder/azure-pipelines.yml b/mosesdecoder/azure-pipelines.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fddd0faea1e935437ee9d67dfb4bd6414f86b636
--- /dev/null
+++ b/mosesdecoder/azure-pipelines.yml
@@ -0,0 +1,100 @@
+# Starter pipeline
+# Start with a minimal pipeline that you can customize to build and deploy your code.
+# Add steps that build, run tests, deploy, and more:
+# https://aka.ms/yaml
+
+trigger:
+- master
+
+pool:
+  #vmImage: 'ubuntu-latest'
+  vmImage: 'ubuntu-16.04'
+
+steps:
+
+- script: |
+    echo Printing some environment information
+    echo HOME: $HOME
+    echo
+    echo UBUNTU VERSION: 
+    cat /etc/lsb-release
+    echo
+    echo CPU INFO
+    cat /proc/cpuinfo
+    echo 
+    echo MEM INFO
+    cat /proc/meminfo
+    echo
+    echo DISK INFO
+    df -h
+    echo 
+    echo PWD: $PWD
+    echo
+    ls
+  displayName: 'Printing some environment information'
+
+
+## Installation commands for Ubuntu 
+- script: |
+    sudo apt-get install  \
+      g++  \
+      git  \
+      subversion \
+      automake \
+      libtool \
+      zlib1g-dev \
+      libicu-dev \
+      libboost-all-dev \
+      libssl-dev \
+      libbz2-dev \
+      liblzma-dev \
+      python-dev \
+      graphviz \
+      imagemagick \
+      make \
+      cmake \
+      libgoogle-perftools-dev \
+      autoconf \
+      doxygen 
+  displayName: 'Install Ubuntu packages'
+
+- script: |   
+    wget "https://sourceforge.net/projects/cmph/files/v2.0.2/cmph-2.0.2.tar.gz/download"
+    mv download  cmph-2.0.2.tar.gz  
+    tar xvzf cmph-2.0.2.tar.gz
+    cd cmph-2.0.2
+    ./configure --prefix=$PWD
+    make
+    make install
+    cd ..
+  displayName: 'Build and Install cmph'    
+
+- script: |   
+    wget "https://sourceforge.net/projects/xmlrpc-c/files/Xmlrpc-c%20Super%20Stable/1.51.06/xmlrpc-c-1.51.06.tgz/download"
+    mv download  xmlrpc-c-1.51.06.tgz
+    tar xvzf xmlrpc-c-1.51.06.tgz
+    cd xmlrpc-c-1.51.06
+    ./configure --prefix=$PWD
+    make
+    make install
+    sudo ldconfig
+    cd ..
+  displayName: 'Build and Install xmlrpc-c'    
+
+- script: |   
+    ./bjam \
+      --with-cmph=$PWD/cmph-2.0.2 \
+      --with-xmlrpc-c=$PWD/xmlrpc-c-1.51.06 \
+      -j2
+  displayName: 'Build Moses'    
+
+# - script: |   
+#     ./bjam \
+#       -j2
+#   displayName: 'Build Moses'  
+
+# - task: ComponentGovernanceComponentDetection@0
+#   inputs:
+#     scanType: 'Register'
+#     verbosity: 'Verbose'
+#     alertWarningLevel: 'High' 
\ No newline at end of file
diff --git a/mosesdecoder/biconcor/Jamfile b/mosesdecoder/biconcor/Jamfile
new file mode 100644
index 0000000000000000000000000000000000000000..83a73800072c01fb80904e897a515191ebc62fb6
--- /dev/null
+++ b/mosesdecoder/biconcor/Jamfile
@@ -0,0 +1,2 @@
+exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
+exe phrase-lookup : Vocabulary.cpp SuffixArray.cpp phrase-lookup.cpp ;
diff --git a/mosesdecoder/biconcor/PhrasePairCollection.cpp b/mosesdecoder/biconcor/PhrasePairCollection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..540aaac6f424d47029c5ddb40cdd7a2aea1c094f
--- /dev/null
+++ b/mosesdecoder/biconcor/PhrasePairCollection.cpp
@@ -0,0 +1,209 @@
+#include "PhrasePairCollection.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+
+#include "Vocabulary.h"
+#include "SuffixArray.h"
+#include "TargetCorpus.h"
+#include "Alignment.h"
+#include "PhrasePair.h"
+#include "Mismatch.h"
+
+using namespace std;
+
+PhrasePairCollection::PhrasePairCollection( SuffixArray *sa, TargetCorpus *tc, Alignment *a, int max_translation, int max_example )
+  :m_suffixArray(sa)
+  ,m_targetCorpus(tc)
+  ,m_alignment(a)
+  ,m_size(0)
+  ,m_max_lookup(10000)          // maximum number of source occurrences sampled
+  ,m_max_translation(max_translation)    // max number of different distinct translations returned
+  ,m_max_example(max_example) // max number of examples returned for each distinct translation
+{}
+
+PhrasePairCollection::~PhrasePairCollection()
+{}
+
+int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
+{
+  INDEX first_match, last_match;
+  if (! m_suffixArray->FindMatches( sourceString, first_match, last_match )) {
+    return 0;
+  }
+  //cerr << "\tfirst match " << first_match << endl;
+  //cerr << "\tlast match " << last_match << endl;
+
+  INDEX found = last_match - first_match +1;
+
+  map< vector< WORD_ID >, INDEX > index;
+  int real_count = 0;
+  for( INDEX i=first_match; i<=last_match; i++ ) {
+    int position = m_suffixArray->GetPosition( i );
+    int source_start = m_suffixArray->GetWordInSentence( position );
+    int source_end = source_start + sourceString.size()-1;
+    INDEX sentence_id = m_suffixArray->GetSentence( position );
+    int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );
+    int target_length = m_targetCorpus->GetSentenceLength( sentence_id );
+    //cerr << "match " << (i-first_match)
+    //<< " in sentence " << sentence_id
+    //<< ", starting at word " << source_start
+    //<< " of " << sentence_length
+    //<< ". target sentence has " << target_length << " words.";
+    int target_start, target_end, pre_null, post_null;
+    if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
+      //cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
+      //cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
+      bool null_boundary_words = false;
+      for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
+        for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
+          vector< WORD_ID > targetString;
+          //cerr << "; ";
+          for (int target = target_start - pre; target <= target_end + post; target++) {
+            targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );
+            //cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";
+          }
+          PhrasePair *phrasePair = new PhrasePair( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, target_length, position, source_start, source_end, target_start-pre, target_end+post, pre, post, pre_null-pre, post_null-post);
+          // matchCollection.Add( sentence_id, )
+          if (index.find( targetString ) == index.end()) {
+            index[targetString] = m_collection.size();
+            vector< PhrasePair* > emptyVector;
+            m_collection.push_back( emptyVector );
+          }
+          m_collection[ index[targetString] ].push_back( phrasePair );
+          m_size++;
+        }
+      }
+    } else {
+      //cerr << "mismatch " << (i-first_match)
+      //		 << " in sentence " << sentence_id
+      //		 << ", starting at word " << source_start
+      //		 << " of " << sentence_length
+      //		 << ". target sentence has " << target_length << " words.";
+      Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
+      if (mismatch->Unaligned())
+        m_unaligned.push_back( mismatch );
+      else
+        m_mismatch.push_back( mismatch );
+    }
+    //cerr << endl;
+
+    if (found > (INDEX)m_max_lookup) {
+      i += found/m_max_lookup-1;
+    }
+    real_count++;
+  }
+  sort(m_collection.begin(), m_collection.end(), CompareBySize());
+  return real_count;
+}
+
+void PhrasePairCollection::Print(bool pretty) const
+{
+  vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
+  int i=0;
+  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && i<m_max_translation; i++, ppWithSameTarget++ ) {
+    (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
+    int count = ppWithSameTarget->size();
+    cout << "(" << count << ")" << endl;
+    vector< PhrasePair* >::const_iterator p = ppWithSameTarget->begin();
+    for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
+      if (pretty) {
+        (*p)->PrintPretty( &cout, 100 );
+      } else {
+        (*p)->Print( &cout );
+      }
+      if (ppWithSameTarget->size() > m_max_example) {
+        p += ppWithSameTarget->size()/m_max_example-1;
+      }
+    }
+  }
+}
+
+void PhrasePairCollection::PrintHTML() const
+{
+  int pp_target = 0;
+  bool singleton = false;
+  // loop over all translations
+  vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
+  for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {
+
+    int count = ppWithSameTarget->size();
+    if (!singleton) {
+      if (count == 1) {
+        singleton = true;
+        cout << "<p class=\"pp_singleton_header\">singleton"
+             << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
+             << (m_collection.end() - ppWithSameTarget)
+             << "/" << m_size << ")</p>";
+      } else {
+        cout << "<p class=\"pp_target_header\">";
+        (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
+        cout << " (" << count << "/" << m_size << ")" << endl;
+        cout << "<p><div id=\"pp_" << pp_target << "\">";
+      }
+      cout << "<table align=\"center\">";
+    }
+
+    vector< PhrasePair* >::const_iterator p;
+    // loop over all sentences where translation occurs
+    int pp=0;
+    int i=0;
+    for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
+      (*p)->PrintClippedHTML( &cout, 160 );
+      if (count > m_max_example) {
+        p += count/m_max_example-1;
+        pp += count/m_max_example-1;
+      }
+    }
+    if (i == 10 && pp < count) {
+      // extended table
+      cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
+      cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
+      cout << "<table align=\"center\">";
+      for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
+        (*p)->PrintClippedHTML( &cout, 160 );
+        if (count > m_max_example) {
+          p += count/m_max_example-1;
+          pp += count/m_max_example-1;
+        }
+      }
+    }
+    if (!singleton) cout << "</table></div>\n";
+
+    if (!singleton && pp_target == 9) {
+      cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
+      cout << "<p class=\"pp_target_header\">(more)</p></div>";
+      cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
+    }
+  }
+  if (singleton) cout << "</table></div>\n";
+  else if (pp_target > 9)	cout << "</div>";
+
+  size_t max_mismatch = m_max_example/3;
+  // unaligned phrases
+  if (m_unaligned.size() > 0) {
+    cout << "<p class=\"pp_singleton_header\">unaligned"
+         << " (" << (m_unaligned.size()) << ")</p>";
+    cout << "<table align=\"center\">";
+    int step_size = 1;
+    if (m_unaligned.size() > max_mismatch)
+      step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
+    for(size_t i=0; i<m_unaligned.size(); i+=step_size)
+      m_unaligned[i]->PrintClippedHTML( &cout, 160 );
+    cout << "</table>";
+  }
+
+  // mismatched phrases
+  if (m_mismatch.size() > 0) {
+    cout << "<p class=\"pp_singleton_header\">mismatched"
+         << " (" << (m_mismatch.size()) << ")</p>";
+    cout << "<table align=\"center\">";
+    int step_size = 1;
+    if (m_mismatch.size() > max_mismatch)
+      step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
+    for(size_t i=0; i<m_mismatch.size(); i+=step_size)
+      m_mismatch[i]->PrintClippedHTML( &cout, 160 );
+    cout << "</table>";
+  }
+}
diff --git a/mosesdecoder/biconcor/PhrasePairCollection.h b/mosesdecoder/biconcor/PhrasePairCollection.h
new file mode 100644
index 0000000000000000000000000000000000000000..e076eba9bbc1c1ae76f3c1f80c0887c7167a1f7e
--- /dev/null
+++ b/mosesdecoder/biconcor/PhrasePairCollection.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <vector>
+#include <string>
+
+class Alignment;
+class PhrasePair;
+class SuffixArray;
+class TargetCorpus;
+class Mismatch;
+
+class PhrasePairCollection
+{
+public:
+  typedef unsigned int INDEX;
+
+private:
+  SuffixArray *m_suffixArray;
+  TargetCorpus *m_targetCorpus;
+  Alignment *m_alignment;
+  std::vector<std::vector<PhrasePair*> > m_collection;
+  std::vector< Mismatch* > m_mismatch, m_unaligned;
+  int m_size;
+  int m_max_lookup;
+  int m_max_translation;
+  int m_max_example;
+
+  // No copying allowed.
+  PhrasePairCollection(const PhrasePairCollection&);
+  void operator=(const PhrasePairCollection&);
+
+public:
+  PhrasePairCollection ( SuffixArray *, TargetCorpus *, Alignment *, int, int );
+  ~PhrasePairCollection ();
+
+  int GetCollection( const std::vector<std::string >& sourceString );
+  void Print(bool pretty) const;
+  void PrintHTML() const;
+};
+
+// sorting helper
+struct CompareBySize {
+  bool operator()(const std::vector<PhrasePair*>& a, const std::vector<PhrasePair*>& b ) const {
+    return a.size() > b.size();
+  }
+};
diff --git a/mosesdecoder/biconcor/SuffixArray.h b/mosesdecoder/biconcor/SuffixArray.h
new file mode 100644
index 0000000000000000000000000000000000000000..f20702e41f0e283a27ce4074f1f2f8ae08964d11
--- /dev/null
+++ b/mosesdecoder/biconcor/SuffixArray.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include "Vocabulary.h"
+
+class SuffixArray
+{
+public:
+  typedef unsigned int INDEX;
+
+private:
+  WORD_ID *m_array;
+  INDEX *m_index;
+  INDEX *m_buffer;
+  char *m_wordInSentence;
+  INDEX *m_sentence;
+  char *m_sentenceLength;
+  WORD_ID m_endOfSentence;
+  INDEX *m_document;
+  INDEX *m_documentName;
+  char *m_documentNameBuffer;
+  size_t m_documentNameLength;
+  size_t m_documentCount;
+  bool m_useDocument;
+  Vocabulary m_vcb;
+  INDEX m_size;
+  INDEX m_sentenceCount;
+
+  // No copying allowed.
+  SuffixArray(const SuffixArray&);
+  void operator=(const SuffixArray&);
+
+public:
+  SuffixArray();
+  ~SuffixArray();
+
+  void Create(const std::string& fileName );
+  bool ProcessDocumentLine( const char* const, const size_t );
+  void Sort(INDEX start, INDEX end);
+  int CompareIndex( INDEX a, INDEX b ) const;
+  inline int CompareWord( WORD_ID a, WORD_ID b ) const;
+  int Count( const std::vector< WORD > &phrase );
+  bool MinCount( const std::vector< WORD > &phrase, INDEX min );
+  bool Exists( const std::vector< WORD > &phrase );
+  int FindMatches( const std::vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
+  int LimitedCount( const std::vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
+  INDEX FindFirst( const std::vector< WORD > &phrase, INDEX &start, INDEX &end );
+  INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
+  int Match( const std::vector< WORD > &phrase, INDEX index );
+  void List( INDEX start, INDEX end );
+  void PrintSentenceMatches( const std::vector< WORD > &phrase );
+  inline INDEX GetPosition( INDEX index ) const {
+    return m_index[ index ];
+  }
+  inline INDEX GetSentence( INDEX position ) const {
+    return m_sentence[position];
+  }
+  inline char GetWordInSentence( INDEX position ) const {
+    return m_wordInSentence[position];
+  }
+  inline char GetSentenceLength( INDEX sentenceId ) const {
+    return m_sentenceLength[sentenceId];
+  }
+  inline INDEX GetSize() const {
+    return m_size;
+  }
+  inline WORD GetWord( INDEX position ) const {
+    return m_vcb.GetWord( m_array[position] );
+  }
+  void UseDocument() {
+    m_useDocument = true;
+  }
+  INDEX GetDocument( INDEX sentence ) const;
+  void PrintDocumentName( INDEX document ) {
+    for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
+      std::cout << m_documentNameBuffer[ i ];
+    }
+  }
+  void Save(const std::string& fileName ) const;
+  void Load(const std::string& fileName );
+  void CheckAllocation(bool, const char *dataStructure) const;
+  bool Error( const char* message, const std::string& fileName) const;
+};
diff --git a/mosesdecoder/biconcor/biconcor.cpp b/mosesdecoder/biconcor/biconcor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cb63e855d82e4b29a3ff3c7ada13fe7996a481cb
--- /dev/null
+++ b/mosesdecoder/biconcor/biconcor.cpp
@@ -0,0 +1,171 @@
+#include "SuffixArray.h"
+#include "TargetCorpus.h"
+#include "Alignment.h"
+#include "PhrasePairCollection.h"
+#include <getopt.h>
+#include "base64.h"
+
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+  // handle parameters
+  string query;
+  string fileNameSuffix;
+  string fileNameSource;
+  string fileNameTarget = "";
+  string fileNameAlignment = "";
+  int loadFlag = false;
+  int saveFlag = false;
+  int createFlag = false;
+  int queryFlag = false;
+  int htmlFlag = false;   // output as HTML
+  int prettyFlag = false; // output readable on screen
+  int stdioFlag = false;  // receive requests from STDIN, respond to STDOUT
+  int max_translation = 20;
+  int max_example = 50;
+  string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create source-corpus]\n\t[--query string]\n\t[--target target-corpus]\n\t[--alignment file]\n\t[--translations count]\n\t[--examples count]\n\t[--html]\n\t[--stdio]\n";
+  while(1) {
+    static struct option long_options[] = {
+      {"load", required_argument, 0, 'l'},
+      {"save", required_argument, 0, 's'},
+      {"create", required_argument, 0, 'c'},
+      {"query", required_argument, 0, 'q'},
+      {"target", required_argument, 0, 't'},
+      {"alignment", required_argument, 0, 'a'},
+      {"html", no_argument, 0, 'h'},
+      {"pretty", no_argument, 0, 'p'},
+      {"stdio", no_argument, 0, 'i'},
+      {"translations", required_argument, 0, 'o'},
+      {"examples", required_argument, 0, 'e'},
+      {0, 0, 0, 0}
+    };
+    int option_index = 0;
+    int c = getopt_long (argc, argv, "l:s:c:q:Q:t:a:hpio:e:", long_options, &option_index);
+    if (c == -1) break;
+    switch (c) {
+    case 'l':
+      fileNameSuffix = string(optarg);
+      loadFlag = true;
+      break;
+    case 't':
+      fileNameTarget = string(optarg);
+      break;
+    case 'a':
+      fileNameAlignment = string(optarg);
+      break;
+    case 's':
+      fileNameSuffix = string(optarg);
+      saveFlag = true;
+      break;
+    case 'c':
+      fileNameSource = string(optarg);
+      createFlag = true;
+      break;
+    case 'Q':
+      query = base64_decode(string(optarg));
+      queryFlag = true;
+      break;
+    case 'q':
+      query = string(optarg);
+      queryFlag = true;
+      break;
+    case 'o':
+      max_translation = atoi(optarg);
+      break;
+    case 'e':
+      max_example = atoi(optarg);
+      break;
+    case 'p':
+      prettyFlag = true;
+      break;
+    case 'h':
+      htmlFlag = true;
+      break;
+    case 'i':
+      stdioFlag = true;
+      break;
+    default:
+      cerr << info;
+      exit(1);
+    }
+  }
+  if (stdioFlag) {
+    queryFlag = true;
+  }
+
+  // check if parameter settings are legal
+  if (saveFlag && !createFlag) {
+    cerr << "error: cannot save without creating\n" << info;
+    exit(1);
+  }
+  if (saveFlag && loadFlag) {
+    cerr << "error: cannot load and save at the same time\n" << info;
+    exit(1);
+  }
+  if (!loadFlag && !createFlag) {
+    cerr << "error: neither load or create - i have no info!\n" << info;
+    exit(1);
+  }
+  if (createFlag && (fileNameTarget == "" || fileNameAlignment == "")) {
+    cerr << "error: i have no target corpus or alignment\n" << info;
+    exit(1);
+  }
+
+  // do your thing
+  SuffixArray suffixArray;
+  TargetCorpus targetCorpus;
+  Alignment alignment;
+  if (createFlag) {
+    cerr << "will create\n";
+    cerr << "source corpus is in " << fileNameSource << endl;
+    suffixArray.Create( fileNameSource );
+    cerr << "target corpus is in " << fileNameTarget << endl;
+    targetCorpus.Create( fileNameTarget );
+    cerr << "alignment is in " << fileNameAlignment << endl;
+    alignment.Create( fileNameAlignment );
+    if (saveFlag) {
+      suffixArray.Save( fileNameSuffix );
+      targetCorpus.Save( fileNameSuffix );
+      alignment.Save( fileNameSuffix );
+      cerr << "will save in " << fileNameSuffix << endl;
+    }
+  }
+  if (loadFlag) {
+    cerr << "will load from " << fileNameSuffix << endl;
+    suffixArray.Load( fileNameSuffix );
+    targetCorpus.Load( fileNameSuffix );
+    alignment.Load( fileNameSuffix );
+  }
+  if (stdioFlag) {
+    cout << "-|||- BICONCOR START -|||-" << endl << flush;
+    while(true) {
+      string query;
+      if (getline(cin, query, '\n').eof()) {
+        return 0;
+      }
+      vector< string > queryString = alignment.Tokenize( query.c_str() );
+      PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
+      int total = ppCollection.GetCollection( queryString );
+      cout << "TOTAL: " << total << endl;
+      if (htmlFlag) {
+        ppCollection.PrintHTML();
+      } else {
+        ppCollection.Print(prettyFlag);
+      }
+      cout << "-|||- BICONCOR END -|||-" << endl << flush;
+    }
+  } else if (queryFlag) {
+    cerr << "query is " << query << endl;
+    vector< string > queryString = alignment.Tokenize( query.c_str() );
+    PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
+    ppCollection.GetCollection( queryString );
+    if (htmlFlag) {
+      ppCollection.PrintHTML();
+    } else {
+      ppCollection.Print(prettyFlag);
+    }
+  }
+
+  return 0;
+}
diff --git a/mosesdecoder/chk.tmp b/mosesdecoder/chk.tmp
new file mode 100644
index 0000000000000000000000000000000000000000..9daeafb9864cf43055ae93beb0afd6c7d144bfa4
--- /dev/null
+++ b/mosesdecoder/chk.tmp
@@ -0,0 +1 @@
+test
diff --git a/mosesdecoder/doxygen.conf b/mosesdecoder/doxygen.conf
new file mode 100644
index 0000000000000000000000000000000000000000..3cd93e9ed54ea8283c01b0dce52a402dbe59c92f
--- /dev/null
+++ b/mosesdecoder/doxygen.conf
@@ -0,0 +1,1781 @@
+# Doxyfile 1.7.6.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
+# to put quotes around the project name if it contains spaces.
+
+PROJECT_NAME           = "Moses Decoder"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doxy
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
+# itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this
+# tag. The format is ext=language, where ext is a file extension, and language
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields will be shown inline in the documentation
+# of the scope in which they are defined (i.e. file, namespace, or group
+# documentation), provided this scope is documented. If set to NO (the default),
+# structs, classes, and unions are shown on a separate page (for HTML and Man
+# pages) or section (for LaTeX and RTF).
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+SYMBOL_CACHE_SIZE      = 0
+
+# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
+# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
+# their name and scope. Since this can be an expensive process and often the
+# same symbol appear multiple times in the code, doxygen keeps a cache of
+# pre-resolved symbols. If the cache is too small doxygen will become slower.
+# If the cache is too large, memory is wasted. The cache size is given by this
+# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = NO
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. The create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = moses
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = opt regtest doxy
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = opt/* regtest/* doxy/*
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO,
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+#  for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# style sheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
+# GENERATE_TREEVIEW to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the
+# mathjax.org site, so you can quickly see the result without installing
+# MathJax, but it is strongly recommended to install a local copy of MathJax
+# before deployment.
+
+MATHJAX_RELPATH        = http://www.mathjax.org/mathjax
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
+# names that should be enabled during MathJax rendering.
+
+MATHJAX_EXTENSIONS     =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvantages are that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS        = 0
+
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
+# set the path where dot can find it.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible in IE 9+ (other browsers do not have this requirement).
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible. Older versions of IE do not have SVG support.
+
+INTERACTIVE_SVG        = NO
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = YES
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/mosesdecoder/moses-cmd/Jamfile b/mosesdecoder/moses-cmd/Jamfile
new file mode 100644
index 0000000000000000000000000000000000000000..f705732f89bc84178dbec10077a89f25b074254b
--- /dev/null
+++ b/mosesdecoder/moses-cmd/Jamfile
@@ -0,0 +1,7 @@
+alias deps :  ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
+
+exe moses : Main.cpp deps ;
+exe vwtrainer : MainVW.cpp deps ;
+exe lmbrgrid : LatticeMBRGrid.cpp deps ;
+alias programs : moses lmbrgrid vwtrainer ;
+
diff --git a/mosesdecoder/moses-cmd/LatticeMBRGrid.cpp b/mosesdecoder/moses-cmd/LatticeMBRGrid.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3cb8013426d82f4a387e4d4ecad8b072975a0da7
--- /dev/null
+++ b/mosesdecoder/moses-cmd/LatticeMBRGrid.cpp
@@ -0,0 +1,215 @@
+// $Id: LatticeMBRGrid.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2010 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+            this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+            this list of conditions and the following disclaimer in the documentation
+            and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+            may be used to endorse or promote products derived from this software
+            without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+/**
+* Lattice MBR grid search. Enables a grid search through the four parameters (p,r,scale and prune) used in lattice MBR.
+  See 'Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation by Tromble, Kumar, Och and Macherey,
+    EMNLP 2008 for details of the parameters.
+
+  The grid search is controlled by specifying comma separated lists for the lmbr parameters (-lmbr-p, -lmbr-r,
+  -lmbr-pruning-factor and -mbr-scale). All other parameters are passed through to moses. If any of the lattice mbr
+  parameters are missing, then they are set to their default values. Output is of the form:
+   sentence-id ||| p r prune scale ||| translation-hypothesis
+**/
+
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <stdexcept>
+#include <set>
+
+#include "moses/IOWrapper.h"
+#include "moses/LatticeMBR.h"
+#include "moses/Manager.h"
+#include "moses/Timer.h"
+#include "moses/StaticData.h"
+#include "util/exception.hh"
+
+#include <boost/foreach.hpp>
+#include "moses/TranslationTask.h"
+
+using namespace std;
+using namespace Moses;
+
+//keys
+enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
+
+namespace Moses
+{
+
+class Grid
+{
+public:
+  /** Add a parameter with key, command line argument, and default value */
+  void addParam(gridkey key, const string& arg, float defaultValue) {
+    m_args[arg] = key;
+    UTIL_THROW_IF2(m_grid.find(key) != m_grid.end(),
+                   "Couldn't find value for key " << (int) key);
+    m_grid[key].push_back(defaultValue);
+  }
+
+  /** Parse the arguments, removing those that define the grid and returning a copy of the rest */
+  void parseArgs(int& argc, char const**& argv) {
+    char const** newargv = new char const*[argc+1]; //Space to add mbr parameter
+    int newargc = 0;
+    for (int i = 0; i < argc; ++i) {
+      bool consumed = false;
+      for (map<string,gridkey>::const_iterator argi = m_args.begin(); argi != m_args.end(); ++argi) {
+        if (!strcmp(argv[i], argi->first.c_str())) {
+          ++i;
+          if (i >= argc) {
+            cerr << "Error: missing parameter for " << argi->first << endl;
+            throw runtime_error("Missing parameter");
+          } else {
+            string value = argv[i];
+            gridkey key = argi->second;
+            if (m_grid[key].size() != 1) {
+              throw runtime_error("Duplicate grid argument");
+            }
+            m_grid[key].clear();
+            char delim = ',';
+            string::size_type lastpos = value.find_first_not_of(delim);
+            string::size_type pos = value.find_first_of(delim,lastpos);
+            while (string::npos != pos || string::npos != lastpos) {
+              float param = atof(value.substr(lastpos, pos-lastpos).c_str());
+              if (!param) {
+                cerr << "Error: Illegal grid parameter for " << argi->first << endl;
+                throw runtime_error("Illegal grid parameter");
+              }
+              m_grid[key].push_back(param);
+              lastpos = value.find_first_not_of(delim,pos);
+              pos = value.find_first_of(delim,lastpos);
+            }
+            consumed = true;
+          }
+          if (consumed) break;
+        }
+      }
+      if (!consumed) {
+        // newargv[newargc] = new char[strlen(argv[i]) + 1];
+        // strcpy(newargv[newargc],argv[i]);
+        newargv[newargc] = argv[i];
+        ++newargc;
+      }
+    }
+    argc = newargc;
+    argv = newargv;
+  }
+
+  /** Get the grid for a particular key.*/
+  const vector<float>& getGrid(gridkey key) const {
+    map<gridkey,vector<float> >::const_iterator iter = m_grid.find(key);
+    assert (iter != m_grid.end());
+    return iter->second;
+
+  }
+
+private:
+  map<gridkey,vector<float> > m_grid;
+  map<string,gridkey> m_args;
+};
+
+} // namespace
+
+int main(int argc, char const* argv[])
+{
+  cerr << "Lattice MBR Grid search" << endl;
+
+  Grid grid;
+  grid.addParam(lmbr_p, "-lmbr-p", 0.5);
+  grid.addParam(lmbr_r, "-lmbr-r", 0.5);
+  grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
+  grid.addParam(lmbr_scale, "-mbr-scale",1.0);
+
+  grid.parseArgs(argc,argv);
+
+  Parameter* params = new Parameter();
+  if (!params->LoadParam(argc,argv)) {
+    params->Explain();
+    exit(1);
+  }
+
+  ResetUserTime();
+  if (!StaticData::LoadDataStatic(params, argv[0])) {
+    exit(1);
+  }
+
+  StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
+  boost::shared_ptr<AllOptions> opts(new AllOptions(*SD.options()));
+  LMBR_Options& lmbr = opts->lmbr;
+  MBR_Options&   mbr = opts->mbr;
+  lmbr.enabled = true;
+
+  boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
+  if (!ioWrapper) {
+    throw runtime_error("Failed to initialise IOWrapper");
+  }
+  size_t nBestSize = mbr.size;
+
+  if (nBestSize <= 0) {
+    throw new runtime_error("Non-positive size specified for n-best list");
+  }
+
+  const vector<float>& pgrid = grid.getGrid(lmbr_p);
+  const vector<float>& rgrid = grid.getGrid(lmbr_r);
+  const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
+  const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
+
+  boost::shared_ptr<InputType> source;
+  while((source = ioWrapper->ReadInput()) != NULL) {
+    // set up task of translating one sentence
+    boost::shared_ptr<TranslationTask> ttask;
+    ttask = TranslationTask::create(source, ioWrapper);
+    Manager manager(ttask);
+    manager.Decode();
+    TrellisPathList nBestList;
+    manager.CalcNBest(nBestSize, nBestList,true);
+    //grid search
+    BOOST_FOREACH(float const& p, pgrid) {
+      lmbr.precision = p;
+      BOOST_FOREACH(float const& r, rgrid) {
+        lmbr.ratio = r;
+        BOOST_FOREACH(size_t const prune_i, prune_grid) {
+          lmbr.pruning_factor = prune_i;
+          BOOST_FOREACH(float const& scale_i, scale_grid) {
+            mbr.scale = scale_i;
+            size_t lineCount = source->GetTranslationId();
+            cout << lineCount << " ||| " << p << " "
+                 << r << " " << size_t(prune_i) << " " << scale_i
+                 << " ||| ";
+            vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+            manager.OutputBestHypo(mbrBestHypo, cout);
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/mosesdecoder/moses-cmd/Main.cpp b/mosesdecoder/moses-cmd/Main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0318d8f4e6f25882931b12baa2a45f68bdeb99c9
--- /dev/null
+++ b/mosesdecoder/moses-cmd/Main.cpp
@@ -0,0 +1,33 @@
+// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+/**
+ * Moses main wrapper for executable for single-threaded and multi-threaded, simply calling decoder_main.
+ **/
+#include "moses/ExportInterface.h"
+#include "util/string_stream.hh"
+
+/** main function of the command line version of the decoder **/
+int main(int argc, char const** argv)
+{
+  return decoder_main(argc, argv);
+}
+
diff --git a/mosesdecoder/moses-cmd/MainVW.cpp b/mosesdecoder/moses-cmd/MainVW.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..694dcee8af0629e471f29e441621f4efbfa88044
--- /dev/null
+++ b/mosesdecoder/moses-cmd/MainVW.cpp
@@ -0,0 +1,186 @@
+// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+/**
+ * Moses main, for single-threaded and multi-threaded.
+ **/
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "util/usage.hh"
+
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+
+#include "moses/IOWrapper.h"
+#include "moses/Hypothesis.h"
+#include "moses/Manager.h"
+#include "moses/StaticData.h"
+#include "moses/TypeDef.h"
+#include "moses/Util.h"
+#include "moses/Timer.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+#include "moses/TrainingTask.h"
+#include "util/random.hh"
+
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+using namespace std;
+using namespace Moses;
+
+namespace Moses
+{
+
+void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
+{
+  outputSearchGraphStream.setf(std::ios::fixed);
+  outputSearchGraphStream.precision(6);
+  StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
+}
+
+
+} //namespace
+
+/** main function of the command line version of the decoder **/
+int main(int argc, char const** argv)
+{
+  //setting in the Staticdata a link between the thread id of this process and a NULL tasksptr
+  // StaticData::InstanceNonConst().SetTask();  // => moved into StaticData constructor
+
+  try {
+
+#ifdef HAVE_PROTOBUF
+    GOOGLE_PROTOBUF_VERIFY_VERSION;
+#endif
+
+    // echo command line, if verbose
+    IFVERBOSE(1) {
+      TRACE_ERR("command: ");
+      for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+      TRACE_ERR(endl);
+    }
+
+    // set number of significant decimals in output
+    FixPrecision(cout);
+    FixPrecision(cerr);
+
+    // load all the settings into the Parameter class
+    // (stores them as strings, or array of strings)
+    Parameter params;
+    if (!params.LoadParam(argc,argv)) {
+      exit(1);
+    }
+
+
+    // initialize all "global" variables, which are stored in StaticData
+    // note: this also loads models such as the language model, etc.
+    ResetUserTime();
+    if (!StaticData::LoadDataStatic(&params, argv[0])) {
+      exit(1);
+    }
+
+    // setting "-show-weights" -> just dump out weights and exit
+    if (params.isParamSpecified("show-weights")) {
+      ShowWeights();
+      exit(0);
+    }
+
+    // shorthand for accessing information in StaticData
+    const StaticData& staticData = StaticData::Instance();
+
+
+    //initialise random numbers
+    util::rand_init();
+
+    // set up read/writing class
+    IFVERBOSE(1) {
+      PrintUserTime("Created input-output object");
+    }
+    AllOptions::ptr opts(new AllOptions(*StaticData::Instance().options()));
+    boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
+    if (ioWrapper == NULL) {
+      cerr << "Error; Failed to create IO object" << endl;
+      exit(1);
+    }
+
+    // check on weights
+    const ScoreComponentCollection& weights = staticData.GetAllWeights();
+    IFVERBOSE(2) {
+      TRACE_ERR("The global weight vector looks like this: ");
+      TRACE_ERR(weights);
+      TRACE_ERR("\n");
+    }
+
+#ifdef WITH_THREADS
+#pragma message ("Compiling with Threads.")
+    ThreadPool pool(staticData.ThreadCount());
+#endif
+
+    // main loop over set of input sentences
+
+    boost::shared_ptr<ContextScope> scope(new ContextScope);
+    boost::shared_ptr<InputType> source;
+    while ((source = ioWrapper->ReadInput()) != NULL) {
+      IFVERBOSE(1) {
+        ResetUserTime();
+      }
+
+      // set up task of training one sentence
+      boost::shared_ptr<TrainingTask> task;
+      task = TrainingTask::create(source, ioWrapper, scope);
+
+      // execute task
+#ifdef WITH_THREADS
+      pool.Submit(task);
+#else
+      task->Run();
+#endif
+    }
+
+    // we are done, finishing up
+#ifdef WITH_THREADS
+    pool.Stop(true); //flush remaining jobs
+#endif
+
+    FeatureFunction::Destroy();
+
+  } catch (const std::exception &e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  IFVERBOSE(1) util::PrintUsage(std::cerr);
+
+#ifndef EXIT_RETURN
+  //This avoids that destructors are called (it can take a long time)
+  exit(EXIT_SUCCESS);
+#else
+  return EXIT_SUCCESS;
+#endif
+}
diff --git a/mosesdecoder/moses-cmd/MainVW.h b/mosesdecoder/moses-cmd/MainVW.h
new file mode 100644
index 0000000000000000000000000000000000000000..49fee0219ec069f2ddb9353995d1ffe0a804389c
--- /dev/null
+++ b/mosesdecoder/moses-cmd/MainVW.h
@@ -0,0 +1,42 @@
+#pragma once
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+			this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+			this list of conditions and the following disclaimer in the documentation
+			and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+			may be used to endorse or promote products derived from this software
+			without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+// example file on how to use moses library
+
+
+#include "moses/StaticData.h"
+
+class IOWrapper;
+
+int main(int argc, char* argv[]);
+
diff --git a/mosesdecoder/moses2/AlignmentInfo.h b/mosesdecoder/moses2/AlignmentInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..89b31a1fc44c160baed53909b06ba9b06f21399e
--- /dev/null
+++ b/mosesdecoder/moses2/AlignmentInfo.h
@@ -0,0 +1,148 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include <iostream>
+#include <ostream>
+#include <set>
+#include <vector>
+#include <cstdlib>
+
+#include <boost/functional/hash.hpp>
+#include "TypeDef.h"
+
+namespace Moses2
+{
+
+class AlignmentInfoCollection;
+class System;
+
+/** Collection of non-terminal alignment pairs, ordered by source index.
+  * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
+ */
+class AlignmentInfo
+{
+  friend struct AlignmentInfoOrderer;
+  friend struct AlignmentInfoHasher;
+  friend class AlignmentInfoCollection;
+  friend class VW;
+
+  friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
+
+public:
+  typedef std::set<std::pair<size_t,size_t> > CollType;
+  typedef std::vector<size_t> NonTermIndexMap;
+  typedef CollType::const_iterator const_iterator;
+
+  const_iterator begin() const {
+    return m_collection.begin();
+  }
+  const_iterator end() const {
+    return m_collection.end();
+  }
+
+  void Add(size_t sourcePos, size_t targetPos) {
+    m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
+  }
+  /** Provides a map from target-side to source-side non-terminal indices.
+    * The target-side index should be the rule symbol index (COUNTING terminals).
+    * The index returned is the rule non-terminal index (IGNORING terminals).
+   */
+  const NonTermIndexMap &GetNonTermIndexMap() const {
+    return m_nonTermIndexMap;
+  }
+
+  /** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
+    * the index counting both terminals and non-terminals) */
+  const NonTermIndexMap &GetNonTermIndexMap2() const {
+    return m_nonTermIndexMap2;
+  }
+
+  const CollType &GetAlignments() const {
+    return m_collection;
+  }
+
+  std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
+  std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
+
+  size_t GetSize() const {
+    return m_collection.size();
+  }
+
+  std::vector< const std::pair<size_t,size_t>* >
+  GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
+
+  std::vector<size_t> GetSourceIndex2PosMap() const;
+
+  bool operator==(const AlignmentInfo& rhs) const {
+    return m_collection == rhs.m_collection &&
+           m_nonTermIndexMap == rhs.m_nonTermIndexMap;
+  }
+
+  std::string Debug(const System &system) const;
+
+private:
+  //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
+  explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
+  explicit AlignmentInfo(const std::vector<unsigned char> &aln);
+
+  // used only by VW to load word alignment between sentences
+  explicit AlignmentInfo(const std::string &str);
+
+  void BuildNonTermIndexMaps();
+
+  CollType m_collection;
+  NonTermIndexMap m_nonTermIndexMap;
+  NonTermIndexMap m_nonTermIndexMap2;
+};
+
+/** Define an arbitrary strict weak ordering between AlignmentInfo objects
+ * for use by AlignmentInfoCollection.
+ */
+struct AlignmentInfoOrderer {
+  bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
+    if (a.m_collection == b.m_collection) {
+      return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
+    } else {
+      return a.m_collection < b.m_collection;
+    }
+  }
+};
+
+/**
+ * Hashing functoid
+ **/
+struct AlignmentInfoHasher {
+  size_t operator()(const AlignmentInfo& a) const {
+    size_t seed = 0;
+    boost::hash_combine(seed,a.m_collection);
+    boost::hash_combine(seed,a.m_nonTermIndexMap);
+    return seed;
+  }
+
+};
+
+inline size_t hash_value(const AlignmentInfo& a)
+{
+  static AlignmentInfoHasher hasher;
+  return hasher(a);
+}
+
+}
diff --git a/mosesdecoder/moses2/AlignmentInfoCollection.cpp b/mosesdecoder/moses2/AlignmentInfoCollection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6116400c417b6fd51204b03a42b6552e6b401c6
--- /dev/null
+++ b/mosesdecoder/moses2/AlignmentInfoCollection.cpp
@@ -0,0 +1,62 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "AlignmentInfoCollection.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+AlignmentInfoCollection AlignmentInfoCollection::s_instance;
+
+AlignmentInfoCollection::AlignmentInfoCollection()
+{
+  std::set<std::pair<size_t,size_t> > pairs;
+  m_emptyAlignmentInfo = Add(pairs);
+}
+
+AlignmentInfoCollection::~AlignmentInfoCollection()
+{}
+
+const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
+{
+  return *m_emptyAlignmentInfo;
+}
+
+AlignmentInfo const *
+AlignmentInfoCollection::
+Add(AlignmentInfo const& ainfo)
+{
+#ifdef WITH_THREADS
+  {
+    boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+    AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
+    if (i != m_collection.end())
+      return &*i;
+  }
+  boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+  std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
+  return &(*ret.first);
+}
+
+
+
+}
diff --git a/mosesdecoder/moses2/ArcLists.cpp b/mosesdecoder/moses2/ArcLists.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1143024c0bcb9aee43c4ae39bb54710d1229273b
--- /dev/null
+++ b/mosesdecoder/moses2/ArcLists.cpp
@@ -0,0 +1,127 @@
+/*
+ * ArcList.cpp
+ *
+ *  Created on: 26 Oct 2015
+ *      Author: hieu
+ */
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "ArcLists.h"
+#include "HypothesisBase.h"
+#include "util/exception.hh"
+
+using namespace std;
+
+namespace Moses2
+{
+
+ArcLists::ArcLists()
+{
+  // TODO Auto-generated constructor stub
+
+}
+
+ArcLists::~ArcLists()
+{
+  BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+    const ArcList *arcList = collPair.second;
+    delete arcList;
+  }
+}
+
+void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
+                      const HypothesisBase *otherHypo)
+{
+  //cerr << added << " " << currHypo << " " << otherHypo << endl;
+  ArcList *arcList;
+  if (added) {
+    // we're winners!
+    if (otherHypo) {
+      // there was a existing losing hypo
+      arcList = &GetAndDetachArcList(otherHypo);
+    } else {
+      // there was no existing hypo
+      arcList = new ArcList;
+    }
+    m_coll[currHypo] = arcList;
+  } else {
+    // we're losers!
+    // there should be a winner, we're not doing beam pruning
+    UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
+    arcList = &GetArcList(otherHypo);
+  }
+
+  // in any case, add the curr hypo
+  arcList->push_back(currHypo);
+}
+
+ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
+{
+  Coll::iterator iter = m_coll.find(hypo);
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+  ArcList &arcList = *iter->second;
+  return arcList;
+}
+
+const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
+{
+  Coll::const_iterator iter = m_coll.find(hypo);
+
+  if (iter == m_coll.end()) {
+    cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
+    BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+      const HypothesisBase *hypo = collPair.first;
+      cerr << hypo << " ";
+    }
+  }
+
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
+  ArcList &arcList = *iter->second;
+  return arcList;
+}
+
+ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
+{
+  Coll::iterator iter = m_coll.find(hypo);
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+  ArcList &arcList = *iter->second;
+
+  m_coll.erase(iter);
+
+  return arcList;
+}
+
+void ArcLists::Sort()
+{
+  BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
+    ArcList &list = *collPair.second;
+    std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
+  }
+}
+
+void ArcLists::Delete(const HypothesisBase *hypo)
+{
+  //cerr << "hypo=" << hypo->Debug() << endl;
+  //cerr << "m_coll=" << m_coll.size() << endl;
+  Coll::iterator iter = m_coll.find(hypo);
+  UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
+  ArcList *arcList = iter->second;
+
+  m_coll.erase(iter);
+  delete arcList;
+}
+
+std::string ArcLists::Debug(const System &system) const
+{
+  stringstream strm;
+  BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
+    const ArcList *arcList = collPair.second;
+    strm << arcList << "(" << arcList->size() << ") ";
+  }
+  return strm.str();
+}
+
+}
+
diff --git a/mosesdecoder/moses2/Array.h b/mosesdecoder/moses2/Array.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d40ce2688fd13d80d721d9ae988b0e0794d7d09
--- /dev/null
+++ b/mosesdecoder/moses2/Array.h
@@ -0,0 +1,85 @@
+#pragma once
+#include <cassert>
+#include <boost/functional/hash.hpp>
+#include "MemPool.h"
+
+namespace Moses2
+{
+
+template<typename T>
+class Array
+{
+public:
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  //! iterators
+  const_iterator begin() const {
+    return m_arr;
+  }
+  const_iterator end() const {
+    return m_arr + m_size;
+  }
+
+  iterator begin() {
+    return m_arr;
+  }
+  iterator end() {
+    return m_arr + m_size;
+  }
+
+  Array(MemPool &pool, size_t size = 0, const T &val = T()) {
+    m_size = size;
+    m_maxSize = size;
+    m_arr = pool.Allocate<T>(size);
+    for (size_t i = 0; i < size; ++i) {
+      m_arr[i] = val;
+    }
+  }
+
+  size_t size() const {
+    return m_size;
+  }
+
+  const T& operator[](size_t ind) const {
+    assert(ind < m_size);
+    return m_arr[ind];
+  }
+
+  T& operator[](size_t ind) {
+    assert(ind < m_size);
+    return m_arr[ind];
+  }
+
+  T *GetArray() {
+    return m_arr;
+  }
+
+  size_t hash() const {
+    size_t seed = 0;
+    for (size_t i = 0; i < m_size; ++i) {
+      boost::hash_combine(seed, m_arr[i]);
+    }
+    return seed;
+  }
+
+  int Compare(const Array &compare) const {
+
+    int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
+    return cmp;
+  }
+
+  bool operator==(const Array &compare) const {
+    int cmp = Compare(compare);
+    return cmp == 0;
+  }
+
+  void resize(size_t newSize) {
+    assert(m_size <= m_maxSize);
+    m_size = newSize;
+  }
+protected:
+  size_t m_size, m_maxSize;
+  T *m_arr;
+};
+
+}
diff --git a/mosesdecoder/moses2/EstimatedScores.h b/mosesdecoder/moses2/EstimatedScores.h
new file mode 100644
index 0000000000000000000000000000000000000000..f854707839b6df1c4e44412b4f456711029eb15a
--- /dev/null
+++ b/mosesdecoder/moses2/EstimatedScores.h
@@ -0,0 +1,59 @@
+// $Id$
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#pragma once
+
+#include <iostream>
+#include "legacy/Util2.h"
+#include "legacy/Bitmap.h"
+#include "legacy/Matrix.h"
+
+namespace Moses2
+{
+class MemPool;
+class System;
+
+//! A square array of floats to store future costs in the phrase-based decoder
+class EstimatedScores: public Matrix<float>
+{
+public:
+  EstimatedScores(MemPool &pool, size_t size) :
+    Matrix<float>(pool, size, size) {
+  }
+
+  ~EstimatedScores(); // not implemented
+
+  float CalcEstimatedScore(Bitmap const&) const;
+  float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
+
+  std::ostream &Debug(std::ostream &out, const System &system) const {
+    for (size_t endPos = 0; endPos < GetSize(); endPos++) {
+      for (size_t startPos = 0; startPos < GetSize(); startPos++)
+        out << GetValue(startPos, endPos) << " ";
+      out << std::endl;
+    }
+    return out;
+  }
+
+};
+
+}
+
diff --git a/mosesdecoder/moses2/HypothesisBase.cpp b/mosesdecoder/moses2/HypothesisBase.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c124866d1036d62bdd7bd98c047b94195f1080cd
--- /dev/null
+++ b/mosesdecoder/moses2/HypothesisBase.cpp
@@ -0,0 +1,81 @@
+/*
+ * Hypothesis.cpp
+ *
+ *  Created on: 24 Oct 2015
+ *      Author: hieu
+ */
+
+#include <boost/foreach.hpp>
+#include <stdlib.h>
+#include <deque>
+#include "HypothesisBase.h"
+#include "System.h"
+#include "Scores.h"
+#include "ManagerBase.h"
+#include "MemPool.h"
+#include "FF/StatefulFeatureFunction.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+//size_t g_numHypos = 0;
+
+HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
+{
+  m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
+      system.featureFunctions.GetNumScores());
+
+  // FF states
+  const std::vector<const StatefulFeatureFunction*> &sfffs =
+    system.featureFunctions.GetStatefulFeatureFunctions();
+  size_t numStatefulFFs = sfffs.size();
+  m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
+
+  BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
+    size_t statefulInd = sfff->GetStatefulInd();
+    FFState *state = sfff->BlankState(pool, system);
+    m_ffStates[statefulInd] = state;
+  }
+}
+
+size_t HypothesisBase::hash() const
+{
+  return hash(0);
+}
+
+size_t HypothesisBase::hash(size_t seed) const
+{
+  size_t numStatefulFFs =
+    GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
+
+  // states
+  for (size_t i = 0; i < numStatefulFFs; ++i) {
+    const FFState *state = m_ffStates[i];
+    size_t hash = state->hash();
+    boost::hash_combine(seed, hash);
+  }
+  return seed;
+
+}
+
+bool HypothesisBase::operator==(const HypothesisBase &other) const
+{
+  size_t numStatefulFFs =
+    GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
+
+  // states
+  for (size_t i = 0; i < numStatefulFFs; ++i) {
+    const FFState &thisState = *m_ffStates[i];
+    const FFState &otherState = *other.m_ffStates[i];
+    if (thisState != otherState) {
+      return false;
+    }
+  }
+  return true;
+
+}
+
+}
+
diff --git a/mosesdecoder/moses2/HypothesisBase.h b/mosesdecoder/moses2/HypothesisBase.h
new file mode 100644
index 0000000000000000000000000000000000000000..55747990667b886913227c27cfee933b945e6c54
--- /dev/null
+++ b/mosesdecoder/moses2/HypothesisBase.h
@@ -0,0 +1,76 @@
+/*
+ * Hypothesis.h
+ *
+ *  Created on: 24 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+
+#include <iostream>
+#include <cstddef>
+#include "FF/FFState.h"
+#include "Scores.h"
+
+namespace Moses2
+{
+
+class ManagerBase;
+class Scores;
+
+class HypothesisBase
+{
+public:
+  virtual ~HypothesisBase() {
+  }
+
+  inline ManagerBase &GetManager() const {
+    return *m_mgr;
+  }
+
+  template<typename T>
+  const T &Cast() const {
+    return static_cast<const T&>(*this);
+  }
+
+  const Scores &GetScores() const {
+    return *m_scores;
+  }
+  Scores &GetScores() {
+    return *m_scores;
+  }
+
+  const FFState *GetState(size_t ind) const {
+    return m_ffStates[ind];
+  }
+  FFState *GetState(size_t ind) {
+    return m_ffStates[ind];
+  }
+
+  virtual size_t hash() const;
+  virtual size_t hash(size_t seed) const;
+  virtual bool operator==(const HypothesisBase &other) const;
+
+  virtual SCORE GetFutureScore() const = 0;
+  virtual void EvaluateWhenApplied() = 0;
+
+  virtual std::string Debug(const System &system) const = 0;
+
+protected:
+  ManagerBase *m_mgr;
+  Scores *m_scores;
+  FFState **m_ffStates;
+
+  HypothesisBase(MemPool &pool, const System &system);
+};
+
+////////////////////////////////////////////////////////////////////////////////////
+class HypothesisFutureScoreOrderer
+{
+public:
+  bool operator()(const HypothesisBase* a, const HypothesisBase* b) const {
+    return a->GetFutureScore() > b->GetFutureScore();
+  }
+};
+
+}
+
diff --git a/mosesdecoder/moses2/HypothesisColl.cpp b/mosesdecoder/moses2/HypothesisColl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6fd8383e4c352f612fca2f55344a4cea2ee1b09f
--- /dev/null
+++ b/mosesdecoder/moses2/HypothesisColl.cpp
@@ -0,0 +1,286 @@
+/*
+ * HypothesisColl.cpp
+ *
+ *  Created on: 26 Feb 2016
+ *      Author: hieu
+ */
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <boost/foreach.hpp>
+#include "HypothesisColl.h"
+#include "ManagerBase.h"
+#include "System.h"
+#include "MemPoolAllocator.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+HypothesisColl::HypothesisColl(const ManagerBase &mgr)
+  :m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
+  ,m_sortedHypos(NULL)
+{
+  m_bestScore = -std::numeric_limits<float>::infinity();
+  m_worstScore = std::numeric_limits<float>::infinity();
+}
+
+const HypothesisBase *HypothesisColl::GetBestHypo() const
+{
+  if (GetSize() == 0) {
+    return NULL;
+  }
+  if (m_sortedHypos) {
+    return (*m_sortedHypos)[0];
+  }
+
+  SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
+  const HypothesisBase *bestHypo;
+  BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+    if (hypo->GetFutureScore() > bestScore) {
+      bestScore = hypo->GetFutureScore();
+      bestHypo = hypo;
+    }
+  }
+  return bestHypo;
+}
+
+void HypothesisColl::Add(
+  const ManagerBase &mgr,
+  HypothesisBase *hypo,
+  Recycler<HypothesisBase*> &hypoRecycle,
+  ArcLists &arcLists)
+{
+  size_t maxStackSize = mgr.system.options.search.stack_size;
+
+  if (GetSize() > maxStackSize * 2) {
+    //cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl;
+    PruneHypos(mgr, mgr.arcLists);
+  }
+
+  SCORE futureScore = hypo->GetFutureScore();
+
+  /*
+  cerr << "scores:"
+      << futureScore << " "
+      << m_bestScore << " "
+      << GetSize() << " "
+      << endl;
+  */
+  if (GetSize() >= maxStackSize && futureScore < m_worstScore) {
+    // beam threshold or really bad hypo that won't make the pruning cut
+    // as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point
+    //cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl;
+    hypoRecycle.Recycle(hypo);
+    return;
+  }
+
+  StackAdd added = Add(hypo);
+
+  size_t nbestSize = mgr.system.options.nbest.nbest_size;
+  if (nbestSize) {
+    arcLists.AddArc(added.added, hypo, added.other);
+  } else {
+    if (added.added) {
+      if (added.other) {
+        hypoRecycle.Recycle(added.other);
+      }
+    } else {
+      hypoRecycle.Recycle(hypo);
+    }
+  }
+
+  // update beam variables
+  if (added.added) {
+    if (futureScore > m_bestScore) {
+      m_bestScore = futureScore;
+      float beamWidth = mgr.system.options.search.beam_width;
+      if ( m_bestScore + beamWidth > m_worstScore ) {
+        m_worstScore = m_bestScore + beamWidth;
+      }
+    } else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
+      m_worstScore = futureScore;
+    }
+  }
+}
+
+StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
+{
+  std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
+  //cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
+
+  // CHECK RECOMBINATION
+  if (addRet.second) {
+    // equiv hypo doesn't exists
+    //cerr << "Added " << hypo << endl;
+    return StackAdd(true, NULL);
+  } else {
+    HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
+    //cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
+
+    if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
+      // incoming hypo is better than the one we have
+  	  //cerr << "Add " << hypo << "(" << hypo->hash() << ")"
+	  //	  << " discard existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
+	  //	  << endl;
+
+      const HypothesisBase * const &hypoExisting1 = *addRet.first;
+      const HypothesisBase *&hypoExisting2 =
+        const_cast<const HypothesisBase *&>(hypoExisting1);
+      hypoExisting2 = hypo;
+
+      return StackAdd(true, hypoExisting);
+    } else {
+      // already storing the best hypo. discard incoming hypo
+      //cerr << "Keep existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
+      //		  << " discard new " << hypo << "(" << hypo->hash() << ")"
+	  //		  << endl;
+      return StackAdd(false, hypoExisting);
+    }
+  }
+
+  //assert(false);
+}
+
+const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos(
+  const ManagerBase &mgr,
+  ArcLists &arcLists) const
+{
+  if (m_sortedHypos == NULL) {
+    // create sortedHypos first
+    MemPool &pool = mgr.GetPool();
+    m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
+        m_coll.size());
+
+    SortHypos(mgr, m_sortedHypos->GetArray());
+
+    // prune
+    Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
+
+    size_t maxStackSize = mgr.system.options.search.stack_size;
+    if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
+      for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
+        HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
+        recycler.Recycle(hypo);
+
+        // delete from arclist
+        if (mgr.system.options.nbest.nbest_size) {
+          arcLists.Delete(hypo);
+        }
+      }
+      m_sortedHypos->resize(maxStackSize);
+    }
+
+  }
+
+  return *m_sortedHypos;
+}
+
+void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
+{
+  size_t maxStackSize = mgr.system.options.search.stack_size;
+
+  Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
+
+  const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *));
+  SortHypos(mgr, sortedHypos);
+
+  // update worse score
+  m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore();
+
+  // prune
+  for (size_t i = maxStackSize; i < GetSize(); ++i) {
+    HypothesisBase *hypo = const_cast<HypothesisBase*>(sortedHypos[i]);
+
+    // delete from arclist
+    if (mgr.system.options.nbest.nbest_size) {
+      arcLists.Delete(hypo);
+    }
+
+    // delete from collection
+    Delete(hypo);
+
+    recycler.Recycle(hypo);
+  }
+
+}
+
+void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const
+{
+  size_t maxStackSize = mgr.system.options.search.stack_size;
+  //assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that
+  //assert(GetSize() > maxStackSize);
+  //assert(sortedHypos.size() == GetSize());
+
+  /*
+   cerr << "UNSORTED hypos: ";
+   BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+     cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
+   }
+   cerr << endl;
+   */
+  size_t ind = 0;
+  BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
+    sortedHypos[ind] = hypo;
+    ++ind;
+  }
+
+  size_t indMiddle;
+  if (maxStackSize == 0) {
+    indMiddle = GetSize();
+  } else if (GetSize() > maxStackSize) {
+    indMiddle = maxStackSize;
+  } else {
+    // GetSize() <= maxStackSize
+    indMiddle = GetSize();
+  }
+
+  const HypothesisBase **iterMiddle = sortedHypos + indMiddle;
+
+  std::partial_sort(
+    sortedHypos,
+    iterMiddle,
+    sortedHypos + GetSize(),
+    HypothesisFutureScoreOrderer());
+
+  /*
+   cerr << "sorted hypos: ";
+   for (size_t i = 0; i < sortedHypos.size(); ++i) {
+     const HypothesisBase *hypo = sortedHypos[i];
+     cerr << hypo << " ";
+   }
+   cerr << endl;
+   */
+}
+
+void HypothesisColl::Delete(const HypothesisBase *hypo)
+{
+  //cerr << " Delete hypo=" << hypo << "(" << hypo->hash() << ")"
+  //		<< " m_coll=" << m_coll.size() << endl;
+
+  size_t erased = m_coll.erase(hypo);
+  UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo);
+}
+
+void HypothesisColl::Clear()
+{
+  m_sortedHypos = NULL;
+  m_coll.clear();
+
+  m_bestScore = -std::numeric_limits<float>::infinity();
+  m_worstScore = std::numeric_limits<float>::infinity();
+}
+
+std::string HypothesisColl::Debug(const System &system) const
+{
+  stringstream out;
+  BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
+    out << hypo->Debug(system);
+    out << std::endl << std::endl;
+  }
+
+  return out.str();
+}
+
+} /* namespace Moses2 */
diff --git a/mosesdecoder/moses2/HypothesisColl.h b/mosesdecoder/moses2/HypothesisColl.h
new file mode 100644
index 0000000000000000000000000000000000000000..9f1fa4bc755c5d4fac62363b1592c59f2d52b353
--- /dev/null
+++ b/mosesdecoder/moses2/HypothesisColl.h
@@ -0,0 +1,75 @@
+/*
+ * HypothesisColl.h
+ *
+ *  Created on: 26 Feb 2016
+ *      Author: hieu
+ */
+#pragma once
+#include <unordered_set>
+#include "HypothesisBase.h"
+#include "MemPoolAllocator.h"
+#include "Recycler.h"
+#include "Array.h"
+#include "legacy/Util2.h"
+
+namespace Moses2
+{
+
+class ManagerBase;
+class ArcLists;
+
+typedef Array<const HypothesisBase*> Hypotheses;
+
+////////////////////////////////////////////////////
+class HypothesisColl
+{
+public:
+  HypothesisColl(const ManagerBase &mgr);
+
+  void Add(const ManagerBase &mgr,
+           HypothesisBase *hypo,
+           Recycler<HypothesisBase*> &hypoRecycle,
+           ArcLists &arcLists);
+
+  size_t GetSize() const {
+    return m_coll.size();
+  }
+
+  void Clear();
+
+  const Hypotheses &GetSortedAndPrunedHypos(
+    const ManagerBase &mgr,
+    ArcLists &arcLists) const;
+
+  const HypothesisBase *GetBestHypo() const;
+
+  template<typename T>
+  const T *GetBestHypo() const {
+    const HypothesisBase *hypo = GetBestHypo();
+    return hypo ? &hypo->Cast<T>() : NULL;
+  }
+
+  void Delete(const HypothesisBase *hypo);
+
+  std::string Debug(const System &system) const;
+
+protected:
+  typedef std::unordered_set<const HypothesisBase*,
+		  UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
+          MemPoolAllocator<const HypothesisBase*> > _HCType;
+
+  _HCType m_coll;
+  mutable Hypotheses *m_sortedHypos;
+
+  SCORE m_bestScore;
+  SCORE m_worstScore;
+
+  StackAdd Add(const HypothesisBase *hypo);
+
+  void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists);
+  void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const;
+
+};
+
+} /* namespace Moses2 */
+
diff --git a/mosesdecoder/moses2/InputPathsBase.h b/mosesdecoder/moses2/InputPathsBase.h
new file mode 100644
index 0000000000000000000000000000000000000000..88e69ea04ef14800a849a12fefb099a6022ed3c4
--- /dev/null
+++ b/mosesdecoder/moses2/InputPathsBase.h
@@ -0,0 +1,54 @@
+/*
+ * InputPaths.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+
+#pragma once
+
+#include <vector>
+#include "MemPool.h"
+
+namespace Moses2
+{
+
+class InputType;
+class System;
+class ManagerBase;
+class InputPathBase;
+
+class InputPathsBase
+{
+  typedef std::vector<InputPathBase*> Coll;
+public:
+  InputPathsBase() {
+  }
+  virtual ~InputPathsBase();
+
+  //! iterators
+  typedef Coll::iterator iterator;
+  typedef Coll::const_iterator const_iterator;
+
+  const_iterator begin() const {
+    return m_inputPaths.begin();
+  }
+  const_iterator end() const {
+    return m_inputPaths.end();
+  }
+
+  iterator begin() {
+    return m_inputPaths.begin();
+  }
+  iterator end() {
+    return m_inputPaths.end();
+  }
+
+  virtual void Init(const InputType &input, const ManagerBase &mgr) = 0;
+
+protected:
+  Coll m_inputPaths;
+};
+
+}
+
diff --git a/mosesdecoder/moses2/Main.cpp b/mosesdecoder/moses2/Main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9b043dd0db0a4d51e12bd13dd2f029dbfdff08ca
--- /dev/null
+++ b/mosesdecoder/moses2/Main.cpp
@@ -0,0 +1,116 @@
+#include <iostream>
+#include <memory>
+#include <boost/pool/pool_alloc.hpp>
+#include "Main.h"
+#include "System.h"
+#include "Phrase.h"
+#include "TranslationTask.h"
+#include "MemPoolAllocator.h"
+#ifdef HAVE_XMLRPC_C
+    #include "server/Server.h"
+#endif // HAVE_XMLRPC_C
+
+#include "legacy/InputFileStream.h"
+#include "legacy/Parameter.h"
+#include "legacy/ThreadPool.h"
+#include "legacy/Timer.h"
+#include "legacy/Util2.h"
+#include "util/usage.hh"
+
+//#include <vld.h>
+
+using namespace std;
+
+//extern size_t g_numHypos;
+
+int main(int argc, char** argv)
+{
+  cerr << "Starting..." << endl;
+
+  Moses2::Timer timer;
+  timer.start();
+  //Temp();
+
+  Moses2::Parameter params;
+  if (!params.LoadParam(argc, argv)) {
+    return EXIT_FAILURE;
+  }
+  Moses2::System system(params);
+  timer.check("Loaded");
+
+  if (params.GetParam("show-weights")) {
+    return EXIT_SUCCESS;
+  }
+
+  //cerr << "system.numThreads=" << system.options.server.numThreads << endl;
+  Moses2::ThreadPool pool(system.options.server.numThreads, system.cpuAffinityOffset, system.cpuAffinityOffsetIncr);
+  //cerr << "CREATED POOL" << endl;
+
+  if (params.GetParam("server")) {
+    std::cerr << "RUN SERVER" << std::endl;
+    run_as_server(system);
+  }
+  else {
+      std::cerr << "RUN BATCH" << std::endl;
+      batch_run(params, system, pool);
+  }
+
+  cerr << "Decoding took " << timer.get_elapsed_time() << endl;
+  //	cerr << "g_numHypos=" << g_numHypos << endl;
+  cerr << "Finished" << endl;
+  return EXIT_SUCCESS;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+void run_as_server(Moses2::System& system)
+{
+#ifdef HAVE_XMLRPC_C
+	Moses2::Server server(system.options.server, system);
+	server.run(system); // actually: don't return. see Server::run()
+#else
+  UTIL_THROW2("Moses2 was compiled without xmlrpc-c. "
+              << "No server functionality available.");
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+istream &GetInputStream(Moses2::Parameter &params)
+{
+  const Moses2::PARAM_VEC *vec = params.GetParam("input-file");
+  if (vec && vec->size()) {
+    Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0));
+    return *stream;
+  } else {
+    return cin;
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+void batch_run(Moses2::Parameter& params, Moses2::System& system, Moses2::ThreadPool& pool)
+{
+    istream& inStream = GetInputStream(params);
+
+    long translationId = 0;
+    string line;
+    while (getline(inStream, line)) {
+        //cerr << "line=" << line << endl;
+        boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
+
+        //cerr << "START pool.Submit()" << endl;
+        pool.Submit(task);
+        //task->Run();
+        ++translationId;
+    }
+
+    pool.Stop(true);
+
+    if (&inStream != &cin) {
+        delete& inStream;
+    }
+
+    //util::PrintUsage(std::cerr);
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/mosesdecoder/moses2/Main.h b/mosesdecoder/moses2/Main.h
new file mode 100644
index 0000000000000000000000000000000000000000..731d6385bc85fd2b3b9778a1c314bbe75ef603ea
--- /dev/null
+++ b/mosesdecoder/moses2/Main.h
@@ -0,0 +1,23 @@
+/*
+ * Main.h
+ *
+ *  Created on: 1 Apr 2016
+ *      Author: hieu
+ */
+#pragma once
+#include <iostream>
+
+namespace Moses2
+{
+class Parameter;
+class System;
+class ThreadPool;
+}
+
+std::istream &GetInputStream(Moses2::Parameter &params);
+void batch_run(Moses2::Parameter &params, Moses2::System &system, Moses2::ThreadPool &pool);
+void run_as_server(Moses2::System &system);
+
+void Temp();
+
+
diff --git a/mosesdecoder/moses2/ManagerBase.cpp b/mosesdecoder/moses2/ManagerBase.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ed38075decbdb07377791944891175a55f3f07c3
--- /dev/null
+++ b/mosesdecoder/moses2/ManagerBase.cpp
@@ -0,0 +1,53 @@
+/*
+ * Manager.cpp
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include <vector>
+#include <sstream>
+#include "System.h"
+#include "ManagerBase.h"
+#include "Phrase.h"
+#include "InputPathsBase.h"
+#include "InputPathBase.h"
+#include "TranslationModel/PhraseTable.h"
+#include "legacy/Range.h"
+#include "PhraseBased/Sentence.h"
+
+using namespace std;
+
+namespace Moses2
+{
+ManagerBase::ManagerBase(System &sys, const TranslationTask &task,
+                         const std::string &inputStr, long translationId)
+  :system(sys)
+  ,task(task)
+  ,m_inputStr(inputStr)
+  ,m_translationId(translationId)
+  ,m_pool(NULL)
+  ,m_systemPool(NULL)
+  ,m_hypoRecycler(NULL)
+  ,m_input(NULL)
+{
+}
+
+ManagerBase::~ManagerBase()
+{
+  system.featureFunctions.CleanUpAfterSentenceProcessing(*m_input);
+
+  GetPool().Reset();
+  GetHypoRecycler().Clear();
+}
+
+void ManagerBase::InitPools()
+{
+  m_pool = &system.GetManagerPool();
+  m_systemPool = &system.GetSystemPool();
+  m_hypoRecycler = &system.GetHypoRecycler();
+  //cerr << "pool size " << m_pool->Size() << " " << m_systemPool->Size() << endl;
+}
+
+}
+
diff --git a/mosesdecoder/moses2/ManagerBase.h b/mosesdecoder/moses2/ManagerBase.h
new file mode 100644
index 0000000000000000000000000000000000000000..b9d5556c7f47f7fad2fd20698d9439b31edbf2d9
--- /dev/null
+++ b/mosesdecoder/moses2/ManagerBase.h
@@ -0,0 +1,81 @@
+/*
+ * Manager.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+
+#pragma once
+
+#include <queue>
+#include <cstddef>
+#include <string>
+#include <deque>
+#include "Phrase.h"
+#include "MemPool.h"
+#include "Recycler.h"
+#include "EstimatedScores.h"
+#include "ArcLists.h"
+#include "legacy/Bitmaps.h"
+
+namespace Moses2
+{
+
+class System;
+class TranslationTask;
+class PhraseImpl;
+class SearchNormal;
+class Search;
+class InputType;
+class OutputCollector;
+class HypothesisBase;
+
+class ManagerBase
+{
+public:
+  System &system;
+  const TranslationTask &task;
+  mutable ArcLists arcLists;
+
+  ManagerBase(System &sys, const TranslationTask &task,
+              const std::string &inputStr, long translationId);
+  virtual ~ManagerBase();
+  virtual void Decode() = 0;
+  virtual std::string OutputBest() const = 0;
+  virtual std::string OutputNBest() = 0;
+  virtual std::string OutputTransOpt() = 0;
+
+  MemPool &GetPool() const {
+    return *m_pool;
+  }
+
+  MemPool &GetSystemPool() const {
+    return *m_systemPool;
+  }
+
+  Recycler<HypothesisBase*> &GetHypoRecycler() const {
+    return *m_hypoRecycler;
+  }
+
+  const InputType &GetInput() const {
+    return *m_input;
+  }
+
+  long GetTranslationId() const {
+    return m_translationId;
+  }
+
+protected:
+  std::string m_inputStr;
+  long m_translationId;
+  InputType *m_input;
+
+  mutable MemPool *m_pool, *m_systemPool;
+  mutable Recycler<HypothesisBase*> *m_hypoRecycler;
+
+  void InitPools();
+
+};
+
+}
+
diff --git a/mosesdecoder/moses2/MemPool.h b/mosesdecoder/moses2/MemPool.h
new file mode 100644
index 0000000000000000000000000000000000000000..d71f7948ddbdab8b4005e801383648faf4fb66f8
--- /dev/null
+++ b/mosesdecoder/moses2/MemPool.h
@@ -0,0 +1,77 @@
+/*
+ * MemPool.h
+ *
+ *  Created on: 28 Oct 2015
+ *      Author: hieu
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits>
+#include <iostream>
+
+namespace Moses2
+{
+
+class MemPool
+{
+  struct Page {
+    uint8_t *mem;
+    uint8_t *end;
+    size_t size;
+
+    Page() = delete;
+    Page(std::size_t size);
+    ~Page();
+  };
+
+public:
+  MemPool(std::size_t initSize = 10240);
+
+  ~MemPool();
+
+  uint8_t* Allocate(std::size_t size);
+
+  template<typename T>
+  T *Allocate() {
+    uint8_t *ret = Allocate(sizeof(T));
+    return (T*) ret;
+  }
+
+  template<typename T>
+  T *Allocate(size_t num) {
+    size_t size = sizeof(T);
+    size_t m = size % 16;
+    size += m;
+
+    uint8_t *ret = Allocate(size * num);
+    return (T*) ret;
+  }
+
+  // re-use pool
+  void Reset();
+
+  size_t Size();
+
+private:
+  uint8_t *More(std::size_t size);
+
+  std::vector<Page*> m_pages;
+
+  size_t m_currSize;
+  size_t m_currPage;
+  uint8_t *current_;
+
+  // no copying
+  MemPool(const MemPool &) = delete;
+  MemPool &operator=(const MemPool &) = delete;
+};
+
+
+}
+
diff --git a/mosesdecoder/moses2/MemPoolAllocator.h b/mosesdecoder/moses2/MemPoolAllocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..994bb77112e0c9cb75b09f4160c3d5a285b5e1b6
--- /dev/null
+++ b/mosesdecoder/moses2/MemPoolAllocator.h
@@ -0,0 +1,85 @@
+#pragma once
+#include "MemPool.h"
+
+namespace Moses2
+{
+
+template<typename T>
+class MemPoolAllocator
+{
+public:
+  typedef T value_type;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef std::size_t size_type;
+  typedef std::ptrdiff_t difference_type;
+
+  template<class U>
+  struct rebind {
+    typedef MemPoolAllocator<U> other;
+  };
+
+  MemPoolAllocator(Moses2::MemPool &pool) :
+    m_pool(pool) {
+  }
+  MemPoolAllocator(const MemPoolAllocator &other) :
+    m_pool(other.m_pool) {
+  }
+
+  template<class U>
+  MemPoolAllocator(const MemPoolAllocator<U>& other) :
+    m_pool(other.m_pool) {
+  }
+
+  size_type max_size() const {
+    return std::numeric_limits<size_type>::max();
+  }
+
+  void deallocate(pointer p, size_type n) {
+    //std::cerr << "deallocate " << p << " " << n << std::endl;
+  }
+
+  pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
+    //std::cerr << "allocate " << n << " " << hint << std::endl;
+    pointer ret = m_pool.Allocate<T>(n);
+    return ret;
+  }
+
+  void construct(pointer p, const_reference val) {
+    //std::cerr << "construct " << p << " " << n << std::endl;
+    new ((void *) p) T(val);
+  }
+
+  void destroy(pointer p) {
+    //std::cerr << "destroy " << p << " " << n << std::endl;
+  }
+
+  // return address of values
+  pointer address (reference value) const {
+    return &value;
+  }
+  const_pointer address (const_reference value) const {
+    return &value;
+  }
+
+  bool operator==(const MemPoolAllocator<T> &allocator) const {
+    return true;
+  }
+
+  bool operator!=(const MemPoolAllocator<T> &allocator) const {
+    return false;
+  }
+
+  MemPoolAllocator<T>& operator=(const MemPoolAllocator<T>& allocator) {
+    return *this;
+  }
+
+  MemPool &m_pool;
+protected:
+};
+
+}
+
+
diff --git a/mosesdecoder/moses2/Moses2Wrapper.h b/mosesdecoder/moses2/Moses2Wrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..00bcc07767340ac512877d6646bfef6e4bab8bd7
--- /dev/null
+++ b/mosesdecoder/moses2/Moses2Wrapper.h
@@ -0,0 +1,30 @@
+#pragma once
+#include <string>
+
+namespace Moses2 {
+	class Parameter;
+	class System;
+	extern "C" {
+		enum MosesApiErrorCode {
+			MS_API_OK,
+			MS_API_E_FAILURE,
+			MS_API_E_INPUT,
+			MS_API_E_TIMEOUT
+		};
+	}
+	class Moses2Wrapper
+	{
+		Parameter* m_param;
+		System* m_system;
+
+	public:
+		Moses2Wrapper(const std::string& filePath);
+		~Moses2Wrapper();
+		std::string Translate(const std::string& input, long id, bool nbest);
+		void UpdateLMPath(const std::string& filePath);
+
+		static char* CopyString(const char* str);
+		static void Free(void* ptr);
+	};
+
+}
\ No newline at end of file
diff --git a/mosesdecoder/moses2/Phrase.h b/mosesdecoder/moses2/Phrase.h
new file mode 100644
index 0000000000000000000000000000000000000000..5a55648d66a1203208123211c770bc88a1f76a0c
--- /dev/null
+++ b/mosesdecoder/moses2/Phrase.h
@@ -0,0 +1,146 @@
+/*
+ * PhraseImpl.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include "Word.h"
+#include "MemPool.h"
+#include "TypeDef.h"
+#include "legacy/FactorCollection.h"
+#include "SCFG/Word.h"
+#include <boost/functional/hash.hpp>
+
+namespace Moses2
+{
+
+template<typename WORD>
+class SubPhrase;
+
+class Scores;
+class PhraseTable;
+class MemPool;
+class System;
+
+template<typename WORD>
+class Phrase
+{
+public:
+  virtual ~Phrase() {
+  }
+  virtual const WORD& operator[](size_t pos) const = 0;
+  virtual size_t GetSize() const = 0;
+
+  virtual const WORD& Back() const {
+    assert(GetSize());
+    return (*this)[GetSize() - 1];
+  }
+
+  virtual size_t hash() const {
+    size_t seed = 0;
+
+    for (size_t i = 0; i < GetSize(); ++i) {
+      const WORD &word = (*this)[i];
+      size_t wordHash = word.hash();
+      boost::hash_combine(seed, wordHash);
+    }
+
+    return seed;
+  }
+
+  virtual bool operator==(const Phrase &compare) const {
+    if (GetSize() != compare.GetSize()) {
+      return false;
+    }
+
+    for (size_t i = 0; i < GetSize(); ++i) {
+      const WORD &word = (*this)[i];
+      const WORD &otherWord = compare[i];
+      if (word != otherWord) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  virtual bool operator!=(const Phrase &compare) const {
+    return !((*this) == compare);
+  }
+
+  virtual std::string GetString(const FactorList &factorTypes) const {
+    if (GetSize() == 0) {
+      return "";
+    }
+
+    std::stringstream ret;
+
+    const WORD &word = (*this)[0];
+    ret << word.GetString(factorTypes);
+    for (size_t i = 1; i < GetSize(); ++i) {
+      const WORD &word = (*this)[i];
+      ret << " " << word.GetString(factorTypes);
+    }
+    return ret.str();
+  }
+
+  virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
+
+  virtual std::string Debug(const System &system) const {
+    std::stringstream out;
+    size_t size = GetSize();
+    if (size) {
+      out << (*this)[0].Debug(system);
+      for (size_t i = 1; i < size; ++i) {
+        const WORD &word = (*this)[i];
+        out << " " << word.Debug(system);
+      }
+    }
+
+    return out.str();
+  }
+
+  virtual void OutputToStream(const System &system, std::ostream &out) const {
+    size_t size = GetSize();
+    if (size) {
+      (*this)[0].OutputToStream(system, out);
+      for (size_t i = 1; i < size; ++i) {
+        const WORD &word = (*this)[i];
+        out << " ";
+        word.OutputToStream(system, out);
+      }
+    }
+  }
+
+
+};
+
+////////////////////////////////////////////////////////////////////////
+template<typename WORD>
+class PhraseOrdererLexical
+{
+public:
+  bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const {
+    size_t minSize = std::min(a.GetSize(), b.GetSize());
+    for (size_t i = 0; i < minSize; ++i) {
+      const Word &aWord = a[i];
+      const Word &bWord = b[i];
+      int cmp = aWord.Compare(bWord);
+      //std::cerr << "WORD: " << aWord << " ||| " << bWord << " ||| " << lessThan << std::endl;
+      if (cmp) {
+        return (cmp < 0);
+      }
+    }
+    return a.GetSize() < b.GetSize();
+  }
+};
+
+}
+
diff --git a/mosesdecoder/moses2/Recycler.cpp b/mosesdecoder/moses2/Recycler.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7a8fb77dccbb1d525bbd60ef3b849c564533eee
--- /dev/null
+++ b/mosesdecoder/moses2/Recycler.cpp
@@ -0,0 +1,13 @@
+/*
+ * Recycler.cpp
+ *
+ *  Created on: 2 Jan 2016
+ *      Author: hieu
+ */
+
+#include "Recycler.h"
+
+namespace Moses2
+{
+
+} /* namespace Moses2 */
diff --git a/mosesdecoder/moses2/Scores.h b/mosesdecoder/moses2/Scores.h
new file mode 100644
index 0000000000000000000000000000000000000000..5069fda3608039d549a53c7704e6a61bb174c462
--- /dev/null
+++ b/mosesdecoder/moses2/Scores.h
@@ -0,0 +1,81 @@
+/*
+ * Scores.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+
+#pragma once
+#include <iostream>
+#include <string>
+#include "TypeDef.h"
+#include "MemPool.h"
+
+namespace Moses2
+{
+
+class FeatureFunction;
+class FeatureFunctions;
+class System;
+
+class Scores
+{
+public:
+  Scores(const System &system, MemPool &pool, size_t numScores);
+  Scores(const System &system, MemPool &pool, size_t numScores,
+         const Scores &origScores);
+
+  virtual ~Scores();
+
+  SCORE GetTotalScore() const {
+    return m_total;
+  }
+
+  const SCORE *GetScores(const FeatureFunction &featureFunction) const;
+
+  void Reset(const System &system);
+
+  void CreateFromString(const std::string &str,
+                        const FeatureFunction &featureFunction, const System &system,
+                        bool transformScores);
+
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  const SCORE &score);
+
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  const SCORE &score, size_t offset);
+
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  const std::vector<SCORE> &scores);
+
+  void PlusEquals(const System &system, const FeatureFunction &featureFunction,
+                  SCORE scores[]);
+
+  void PlusEquals(const System &system, const Scores &scores);
+
+  void MinusEquals(const System &system, const Scores &scores);
+
+  void Assign(const System &system, const FeatureFunction &featureFunction,
+              const SCORE &score);
+
+  void Assign(const System &system, const FeatureFunction &featureFunction,
+              const std::vector<SCORE> &scores);
+
+  std::string Debug(const System &system) const;
+
+  void OutputBreakdown(std::ostream &out, const System &system) const;
+
+  // static functions to work out estimated scores
+  static SCORE CalcWeightedScore(const System &system,
+                                 const FeatureFunction &featureFunction, SCORE scores[]);
+
+  static SCORE CalcWeightedScore(const System &system,
+                                 const FeatureFunction &featureFunction, SCORE score);
+
+protected:
+  SCORE *m_scores;
+  SCORE m_total;
+};
+
+}
+
diff --git a/mosesdecoder/moses2/SubPhrase.h b/mosesdecoder/moses2/SubPhrase.h
new file mode 100644
index 0000000000000000000000000000000000000000..21b003912666f5150f9314660595873b83b6d825
--- /dev/null
+++ b/mosesdecoder/moses2/SubPhrase.h
@@ -0,0 +1,54 @@
+#pragma once
+#include <sstream>
+#include "Phrase.h"
+#include "Word.h"
+#include "SCFG/Word.h"
+
+namespace Moses2
+{
+class System;
+
+template<typename WORD>
+class SubPhrase: public Phrase<WORD>
+{
+public:
+  SubPhrase(const Phrase<WORD> &origPhrase, size_t start, size_t size)
+    :m_origPhrase(&origPhrase)
+    ,m_start(start)
+    ,m_size(size)
+  {}
+
+  virtual const WORD& operator[](size_t pos) const {
+    return (*m_origPhrase)[pos + m_start];
+  }
+
+  virtual size_t GetSize() const {
+    return m_size;
+  }
+
+  SubPhrase GetSubPhrase(size_t start, size_t size) const {
+    SubPhrase ret(*m_origPhrase, m_start + start, size);
+    return ret;
+  }
+
+  virtual std::string Debug(const System &system) const {
+    std::stringstream out;
+    if (GetSize()) {
+      out << (*this)[0].Debug(system);
+      for (size_t i = 1; i < GetSize(); ++i) {
+        const WORD &word = (*this)[i];
+        out << " " << word.Debug(system);
+      }
+    }
+
+    return out.str();
+  }
+
+protected:
+  const Phrase<WORD> *m_origPhrase;
+  size_t m_start, m_size;
+};
+
+
+}
+
diff --git a/mosesdecoder/moses2/TargetPhrase.cpp b/mosesdecoder/moses2/TargetPhrase.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..600d41ae75fd19578737d70a01f0b57b9c853b1b
--- /dev/null
+++ b/mosesdecoder/moses2/TargetPhrase.cpp
@@ -0,0 +1,15 @@
+/*
+ * TargetPhrase.cpp
+ *
+ *  Created on: 26 Apr 2016
+ *      Author: hieu
+ */
+
+#include "TargetPhrase.h"
+#include "System.h"
+#include "Scores.h"
+
+namespace Moses2
+{
+
+} /* namespace Moses2 */
diff --git a/mosesdecoder/moses2/TranslationTask.cpp b/mosesdecoder/moses2/TranslationTask.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..07ec0a01131523c2ad716bdd9f8abf4202a4d657
--- /dev/null
+++ b/mosesdecoder/moses2/TranslationTask.cpp
@@ -0,0 +1,65 @@
+#include "TranslationTask.h"
+#include "System.h"
+#include "InputType.h"
+#include "PhraseBased/Manager.h"
+#include "SCFG/Manager.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+TranslationTask::TranslationTask(System &system,
+                                 const std::string &line,
+                                 long translationId)
+{
+  if (system.isPb) {
+    m_mgr = new Manager(system, *this, line, translationId);
+  } else {
+    m_mgr = new SCFG::Manager(system, *this, line, translationId);
+  }
+}
+
+TranslationTask::~TranslationTask()
+{
+}
+
+std::string TranslationTask::ReturnTranslation(bool nbest) const
+{
+    m_mgr->Decode();
+    string out;
+    if (nbest) {
+      out = m_mgr->OutputNBest() + "\n";
+    }
+    else {
+      out = m_mgr->OutputBest() + "\n";
+    }
+    delete m_mgr;
+    return out;
+}
+
+void TranslationTask::Run()
+{
+
+  m_mgr->Decode();
+
+  string out;
+
+  out = m_mgr->OutputBest() + "\n";
+  m_mgr->system.bestCollector->Write(m_mgr->GetTranslationId(), out);
+
+  if (m_mgr->system.options.nbest.nbest_size) {
+    out = m_mgr->OutputNBest();
+    m_mgr->system.nbestCollector->Write(m_mgr->GetTranslationId(), out);
+  }
+
+  if (!m_mgr->system.options.output.detailed_transrep_filepath.empty()) {
+    out = m_mgr->OutputTransOpt();
+    m_mgr->system.detailedTranslationCollector->Write(m_mgr->GetTranslationId(), out);
+  }
+
+  delete m_mgr;
+}
+
+}
+
diff --git a/mosesdecoder/moses2/TrellisPaths.h b/mosesdecoder/moses2/TrellisPaths.h
new file mode 100644
index 0000000000000000000000000000000000000000..6a6a59c1a64abd681e5c1f11aef7d4a5988b5741
--- /dev/null
+++ b/mosesdecoder/moses2/TrellisPaths.h
@@ -0,0 +1,64 @@
+/*
+ * TrellisPaths.h
+ *
+ *  Created on: 16 Mar 2016
+ *      Author: hieu
+ */
+#pragma once
+
+#include <vector>
+#include <queue>
+#include "PhraseBased/TrellisPath.h"
+
+namespace Moses2
+{
+
+template<typename T>
+struct CompareTrellisPath {
+  bool operator()(const T* pathA, const T* pathB) const {
+    return (pathA->GetFutureScore() < pathB->GetFutureScore());
+  }
+};
+
+template<typename T>
+class TrellisPaths
+{
+public:
+  TrellisPaths() {}
+
+  virtual ~TrellisPaths() {
+    while (!empty()) {
+      T *path = Get();
+      delete path;
+    }
+  }
+
+  bool empty() const {
+    return m_coll.empty();
+  }
+
+  //! add a new entry into collection
+  void Add(T *trellisPath) {
+    m_coll.push(trellisPath);
+  }
+
+  T *Get() {
+    T *top = m_coll.top();
+
+    // Detach
+    m_coll.pop();
+    return top;
+  }
+
+  size_t GetSize() const {
+    return m_coll.size();
+  }
+
+protected:
+  typedef std::priority_queue<T*, std::vector<T*>,
+          CompareTrellisPath<T> > CollectionType;
+  CollectionType m_coll;
+};
+
+} /* namespace Moses2 */
+
diff --git a/mosesdecoder/moses2/TypeDef.h b/mosesdecoder/moses2/TypeDef.h
new file mode 100644
index 0000000000000000000000000000000000000000..d96257ac29e3cca13167ebc61cfae65735d6fa0f
--- /dev/null
+++ b/mosesdecoder/moses2/TypeDef.h
@@ -0,0 +1,125 @@
+/*
+ * TypeDef.h
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include <istream>
+#include "Vector.h"
+
+namespace Moses2
+{
+
+class HypothesisBase;
+
+#define NOT_FOUND 			std::numeric_limits<size_t>::max()
+const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+const size_t DEFAULT_MAX_CHART_SPAN     = 20;
+const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
+const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
+const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
+const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
+
+const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
+const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
+const float LOWEST_SCORE = -100.0f;
+
+const float DEFAULT_BEAM_WIDTH        = 0.00001f;
+const float DEFAULT_EARLY_DISCARDING_THRESHOLD    = 0.0f;
+const float DEFAULT_TRANSLATION_OPTION_THRESHOLD  = 0.0f;
+
+#ifndef BOS_
+#define BOS_ "<s>" //Beginning of sentence symbol
+#endif
+#ifndef EOS_
+#define EOS_ "</s>" //End of sentence symbol
+#endif
+
+typedef size_t FactorType;
+typedef float SCORE;
+typedef std::vector<FactorType> FactorList;
+
+// Note: StaticData uses SearchAlgorithm to determine whether the translation
+// model is phrase-based or syntax-based.  If you add a syntax-based search
+// algorithm here then you should also update StaticData::IsSyntax().
+enum SearchAlgorithm {
+  Normal = 0, CubePruning = 1,
+  //,CubeGrowing = 2
+  CYKPlus = 3,
+  NormalBatch  = 4,
+  ChartIncremental = 5,
+  SyntaxS2T = 6,
+  SyntaxT2S = 7,
+  SyntaxT2S_SCFG = 8,
+  SyntaxF2S = 9,
+  CubePruningPerMiniStack = 10,
+  CubePruningPerBitmap = 11,
+  CubePruningCardinalStack = 12,
+  CubePruningBitmapStack = 13,
+  CubePruningMiniStack = 14,
+  DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
+};
+
+enum InputTypeEnum {
+  SentenceInput         = 0,
+  ConfusionNetworkInput = 1,
+  WordLatticeInput      = 2,
+  TreeInputType         = 3,
+  //,WordLatticeInput2 = 4,
+  TabbedSentenceInput    = 5,
+  ForestInputType        = 6,
+  SentenceInputWithCandidates = 7,
+};
+
+enum XmlInputType {
+  XmlPassThrough = 0,
+  XmlIgnore      = 1,
+  XmlExclusive   = 2,
+  XmlInclusive   = 3,
+  XmlConstraint  = 4
+};
+
+enum WordAlignmentSort {
+  NoSort = 0,
+  TargetOrder = 1
+};
+
+enum S2TParsingAlgorithm {
+  RecursiveCYKPlus,
+  Scope3
+};
+
+enum SourceLabelOverlap {
+  SourceLabelOverlapAdd = 0,
+  SourceLabelOverlapReplace = 1,
+  SourceLabelOverlapDiscard = 2
+};
+
+/////////////////////////
+// MOSES2 only
+
+class StackAdd
+{
+public:
+  bool added;
+  HypothesisBase *other;
+
+  StackAdd() {
+  }
+  StackAdd(bool vadded, HypothesisBase *vOther) :
+    added(vadded), other(vOther) {
+  }
+};
+
+class Hypothesis;
+typedef Vector<Hypothesis*> Batch;
+
+class Factor;
+typedef std::vector<const Factor*> Context;
+
+}
+
diff --git a/mosesdecoder/moses2/Vector.cpp b/mosesdecoder/moses2/Vector.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..46af0f7934b7c0b749e40811d65680148a44a3e2
--- /dev/null
+++ b/mosesdecoder/moses2/Vector.cpp
@@ -0,0 +1,14 @@
+/*
+ * Vector.cpp
+ *
+ *  Created on: 7 Dec 2015
+ *      Author: hieu
+ */
+
+#include "Vector.h"
+
+namespace Moses2
+{
+
+}
+
diff --git a/mosesdecoder/moses2/Weights.h b/mosesdecoder/moses2/Weights.h
new file mode 100644
index 0000000000000000000000000000000000000000..96fdb5a71a20cc360d1d618a02d5abcdbb4810fa
--- /dev/null
+++ b/mosesdecoder/moses2/Weights.h
@@ -0,0 +1,38 @@
+/*
+ * Weights.h
+ *
+ *  Created on: 24 Oct 2015
+ *      Author: hieu
+ */
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include "TypeDef.h"
+
+namespace Moses2
+{
+
+class FeatureFunctions;
+
+class Weights
+{
+public:
+  Weights();
+  virtual ~Weights();
+  void Init(const FeatureFunctions &ffs);
+
+  SCORE operator[](size_t ind) const {
+    return m_weights[ind];
+  }
+
+  std::vector<SCORE> GetWeights(const FeatureFunction &ff) const;
+
+  void SetWeights(const FeatureFunctions &ffs, const std::string &ffName, const std::vector<float> &weights);
+
+protected:
+  std::vector<SCORE> m_weights;
+};
+
+}
+
diff --git a/mosesdecoder/moses2/Word.cpp b/mosesdecoder/moses2/Word.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f272f7cdcc135aa10a42ea11939b94157ddb54ec
--- /dev/null
+++ b/mosesdecoder/moses2/Word.cpp
@@ -0,0 +1,136 @@
+/*
+ * Word.cpp
+ *
+ *  Created on: 23 Oct 2015
+ *      Author: hieu
+ */
+#include <boost/functional/hash_fwd.hpp>
+#include <sstream>
+#include <vector>
+#include "Word.h"
+#include "System.h"
+#include "legacy/Util2.h"
+#include "util/murmur_hash.hh"
+
+using namespace std;
+
+namespace Moses2
+{
+
+Word::Word()
+{
+  Init<const Factor*>(m_factors, MAX_NUM_FACTORS, NULL);
+}
+
+Word::Word(const Word &copy)
+{
+  memcpy(m_factors, copy.m_factors, sizeof(const Factor *) * MAX_NUM_FACTORS);
+}
+
+Word::~Word()
+{
+  // TODO Auto-generated destructor stub
+}
+
+void Word::CreateFromString(FactorCollection &vocab, const System &system,
+                            const std::string &str)
+{
+  vector<string> toks = Tokenize(str, "|");
+  for (size_t i = 0; i < toks.size(); ++i) {
+    const string &tok = toks[i];
+    //cerr << "tok=" << tok << endl;
+    const Factor *factor = vocab.AddFactor(tok, system, false);
+    m_factors[i] = factor;
+  }
+
+  // null the rest
+  for (size_t i = toks.size(); i < MAX_NUM_FACTORS; ++i) {
+    m_factors[i] = NULL;
+  }
+}
+
+size_t Word::hash() const
+{
+  uint64_t seed = 0;
+  size_t ret = util::MurmurHashNative(m_factors,
+                                      sizeof(Factor*) * MAX_NUM_FACTORS, seed);
+  return ret;
+}
+
+size_t Word::hash(const std::vector<FactorType> &factors) const
+{
+  size_t seed = 0;
+  for (size_t i = 0; i < factors.size(); ++i) {
+    FactorType factorType = factors[i];
+    const Factor *factor = m_factors[factorType];
+    boost::hash_combine(seed, factor);
+  }
+  return seed;
+}
+
+
+int Word::Compare(const Word &compare) const
+{
+
+  int cmp = memcmp(m_factors, compare.m_factors,
+                   sizeof(Factor*) * MAX_NUM_FACTORS);
+  return cmp;
+
+  /*
+   int ret = m_factors[0]->GetString().compare(compare.m_factors[0]->GetString());
+   return ret;
+   */
+}
+
+bool Word::operator<(const Word &compare) const
+{
+  int cmp = Compare(compare);
+  return (cmp < 0);
+}
+
+std::string Word::Debug(const System &system) const
+{
+  stringstream out;
+  bool outputAlready = false;
+  for (size_t i = 0; i < MAX_NUM_FACTORS; ++i) {
+    const Factor *factor = m_factors[i];
+    if (factor) {
+      if (outputAlready) {
+        out << "|";
+      }
+      out << *factor;
+      outputAlready = true;
+    }
+  }
+
+  return out.str();
+}
+
+void Word::OutputToStream(const System &system, std::ostream &out) const
+{
+  const std::vector<FactorType> &factorTypes = system.options.output.factor_order;
+  out << *m_factors[ factorTypes[0] ];
+
+  for (size_t i = 1; i < factorTypes.size(); ++i) {
+    FactorType factorType = factorTypes[i];
+    const Factor *factor = m_factors[factorType];
+
+    out << "|" << *factor;
+  }
+}
+
+std::string Word::GetString(const FactorList &factorTypes) const
+{
+  assert(factorTypes.size());
+  std::stringstream ret;
+
+  ret << m_factors[factorTypes[0]]->GetString();
+  for (size_t i = 1; i < factorTypes.size(); ++i) {
+    FactorType factorType = factorTypes[i];
+    ret << "|" << m_factors[factorType];
+  }
+  return ret.str();
+}
+
+}
+
diff --git a/mosesdecoder/moses2/pugixml.cpp b/mosesdecoder/moses2/pugixml.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a39f25880de738604ffe18dd7a6f781bc70afd6b
--- /dev/null
+++ b/mosesdecoder/moses2/pugixml.cpp
@@ -0,0 +1,11456 @@
+/**
+ * pugixml parser - version 1.7
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef SOURCE_PUGIXML_CPP
+#define SOURCE_PUGIXML_CPP
+
+#include "pugixml.hpp"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#ifdef PUGIXML_WCHAR_MODE
+#	include <wchar.h>
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+#	include <math.h>
+#	include <float.h>
+#	ifdef PUGIXML_NO_EXCEPTIONS
+#		include <setjmp.h>
+#	endif
+#endif
+
+#ifndef PUGIXML_NO_STL
+#	include <istream>
+#	include <ostream>
+#	include <string>
+#endif
+
+// For placement new
+#include <new>
+
+#ifdef _MSC_VER
+#	pragma warning(push)
+#	pragma warning(disable: 4127) // conditional expression is constant
+#	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+#	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+#	pragma warning(disable: 4702) // unreachable code
+#	pragma warning(disable: 4996) // this function or variable may be unsafe
+#	pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
+#endif
+
+#ifdef __INTEL_COMPILER
+#	pragma warning(disable: 177) // function was declared but never referenced
+#	pragma warning(disable: 279) // controlling expression is constant
+#	pragma warning(disable: 1478 1786) // function was declared "deprecated"
+#	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
+#endif
+
+#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
+#	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
+#endif
+
+#ifdef __BORLANDC__
+#	pragma option push
+#	pragma warn -8008 // condition is always false
+#	pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
+#	pragma diag_suppress=178 // function was declared but never referenced
+#	pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+#	define PUGI__NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+#	define PUGI__NO_INLINE __attribute__((noinline))
+#else
+#	define PUGI__NO_INLINE
+#endif
+
+// Branch weight controls
+#if defined(__GNUC__)
+#	define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
+#else
+#	define PUGI__UNLIKELY(cond) (cond)
+#endif
+
+// Simple static assertion
+#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+#	define PUGI__DMC_VOLATILE volatile
+#else
+#	define PUGI__DMC_VOLATILE
+#endif
+
+// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
+#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
+using std::memcpy;
+using std::memmove;
+using std::memset;
+#endif
+
+// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
+#if defined(_MSC_VER) && !defined(__S3E__)
+#	define PUGI__MSVC_CRT_VERSION _MSC_VER
+#endif
+
+#ifdef PUGIXML_HEADER_ONLY
+#	define PUGI__NS_BEGIN namespace pugi { namespace impl {
+#	define PUGI__NS_END } }
+#	define PUGI__FN inline
+#	define PUGI__FN_NO_INLINE inline
+#else
+#	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
+#		define PUGI__NS_BEGIN namespace pugi { namespace impl {
+#		define PUGI__NS_END } }
+#	else
+#		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
+#		define PUGI__NS_END } } }
+#	endif
+#	define PUGI__FN
+#	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
+#endif
+
+// uintptr_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+#	include <stdint.h>
+#else
+namespace pugi
+{
+#	ifndef _UINTPTR_T_DEFINED
+typedef size_t uintptr_t;
+#	endif
+
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+}
+#endif
+
+// Memory allocation
+PUGI__NS_BEGIN
+PUGI__FN void* default_allocate(size_t size)
+{
+  return malloc(size);
+}
+
+PUGI__FN void default_deallocate(void* ptr)
+{
+  free(ptr);
+}
+
+template <typename T>
+struct xml_memory_management_function_storage {
+  static allocation_function allocate;
+  static deallocation_function deallocate;
+};
+
+// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
+// Without a template<> we'll get multiple definitions of the same static
+template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
+template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
+
+typedef xml_memory_management_function_storage<int> xml_memory;
+PUGI__NS_END
+
+// String utilities
+PUGI__NS_BEGIN
+// Get string length
+PUGI__FN size_t strlength(const char_t* s)
+{
+  assert(s);
+
+#ifdef PUGIXML_WCHAR_MODE
+  return wcslen(s);
+#else
+  return strlen(s);
+#endif
+}
+
+// Compare two strings
+PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+{
+  assert(src && dst);
+
+#ifdef PUGIXML_WCHAR_MODE
+  return wcscmp(src, dst) == 0;
+#else
+  return strcmp(src, dst) == 0;
+#endif
+}
+
+// Compare lhs with [rhs_begin, rhs_end)
+PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+{
+  for (size_t i = 0; i < count; ++i)
+    if (lhs[i] != rhs[i])
+      return false;
+
+  return lhs[count] == 0;
+}
+
+// Get length of wide string, even if CRT lacks wide character support
+PUGI__FN size_t strlength_wide(const wchar_t* s)
+{
+  assert(s);
+
+#ifdef PUGIXML_WCHAR_MODE
+  return wcslen(s);
+#else
+  const wchar_t* end = s;
+  while (*end) end++;
+  return static_cast<size_t>(end - s);
+#endif
+}
+PUGI__NS_END
+
+// auto_ptr-like object for exception recovery
+PUGI__NS_BEGIN
+template <typename T, typename D = void(*)(T*)> struct auto_deleter {
+  T* data;
+  D deleter;
+
+  auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) {
+  }
+
+  ~auto_deleter() {
+    if (data) deleter(data);
+  }
+
+  T* release() {
+    T* result = data;
+    data = 0;
+    return result;
+  }
+};
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+class compact_hash_table
+{
+public:
+  compact_hash_table(): _items(0), _capacity(0), _count(0) {
+  }
+
+  void clear() {
+    if (_items) {
+      xml_memory::deallocate(_items);
+      _items = 0;
+      _capacity = 0;
+      _count = 0;
+    }
+  }
+
+  void** find(const void* key) {
+    assert(key);
+
+    if (_capacity == 0) return 0;
+
+    size_t hashmod = _capacity - 1;
+    size_t bucket = hash(key) & hashmod;
+
+    for (size_t probe = 0; probe <= hashmod; ++probe) {
+      item_t& probe_item = _items[bucket];
+
+      if (probe_item.key == key)
+        return &probe_item.value;
+
+      if (probe_item.key == 0)
+        return 0;
+
+      // hash collision, quadratic probing
+      bucket = (bucket + probe + 1) & hashmod;
+    }
+
+    assert(!"Hash table is full");
+    return 0;
+  }
+
+  void** insert(const void* key) {
+    assert(key);
+    assert(_capacity != 0 && _count < _capacity - _capacity / 4);
+
+    size_t hashmod = _capacity - 1;
+    size_t bucket = hash(key) & hashmod;
+
+    for (size_t probe = 0; probe <= hashmod; ++probe) {
+      item_t& probe_item = _items[bucket];
+
+      if (probe_item.key == 0) {
+        probe_item.key = key;
+        _count++;
+        return &probe_item.value;
+      }
+
+      if (probe_item.key == key)
+        return &probe_item.value;
+
+      // hash collision, quadratic probing
+      bucket = (bucket + probe + 1) & hashmod;
+    }
+
+    assert(!"Hash table is full");
+    return 0;
+  }
+
+  bool reserve() {
+    if (_count + 16 >= _capacity - _capacity / 4)
+      return rehash();
+
+    return true;
+  }
+
+private:
+  struct item_t {
+    const void* key;
+    void* value;
+  };
+
+  item_t* _items;
+  size_t _capacity;
+
+  size_t _count;
+
+  bool rehash();
+
+  static unsigned int hash(const void* key) {
+    unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
+
+    // MurmurHash3 32-bit finalizer
+    h ^= h >> 16;
+    h *= 0x85ebca6bu;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35u;
+    h ^= h >> 16;
+
+    return h;
+  }
+};
+
+PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
+{
+  compact_hash_table rt;
+  rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
+  rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
+
+  if (!rt._items)
+    return false;
+
+  memset(rt._items, 0, sizeof(item_t) * rt._capacity);
+
+  for (size_t i = 0; i < _capacity; ++i)
+    if (_items[i].key)
+      *rt.insert(_items[i].key) = _items[i].value;
+
+  if (_items)
+    xml_memory::deallocate(_items);
+
+  _capacity = rt._capacity;
+  _items = rt._items;
+
+  assert(_count == rt._count);
+
+  return true;
+}
+
+PUGI__NS_END
+#endif
+
+PUGI__NS_BEGIN
+static const size_t xml_memory_page_size =
+#ifdef PUGIXML_MEMORY_PAGE_SIZE
+  PUGIXML_MEMORY_PAGE_SIZE
+#else
+  32768
+#endif
+  ;
+
+#ifdef PUGIXML_COMPACT
+static const uintptr_t xml_memory_block_alignment = 4;
+
+static const uintptr_t xml_memory_page_alignment = sizeof(void*);
+#else
+static const uintptr_t xml_memory_block_alignment = sizeof(void*);
+
+static const uintptr_t xml_memory_page_alignment = 64;
+static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+#endif
+
+// extra metadata bits
+static const uintptr_t xml_memory_page_contents_shared_mask = 32;
+static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+static const uintptr_t xml_memory_page_type_mask = 7;
+
+// combined masks for string uniqueness
+static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
+static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
+
+#ifdef PUGIXML_COMPACT
+#define PUGI__GETPAGE_IMPL(header) (header).get_page()
+#else
+#define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask)
+#endif
+
+#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
+#define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1)
+
+struct xml_allocator;
+
+struct xml_memory_page {
+  static xml_memory_page* construct(void* memory) {
+    xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+
+    result->allocator = 0;
+    result->prev = 0;
+    result->next = 0;
+    result->busy_size = 0;
+    result->freed_size = 0;
+
+#ifdef PUGIXML_COMPACT
+    result->compact_string_base = 0;
+    result->compact_shared_parent = 0;
+    result->compact_page_marker = 0;
+#endif
+
+    return result;
+  }
+
+  xml_allocator* allocator;
+
+  xml_memory_page* prev;
+  xml_memory_page* next;
+
+  size_t busy_size;
+  size_t freed_size;
+
+#ifdef PUGIXML_COMPACT
+  char_t* compact_string_base;
+  void* compact_shared_parent;
+  uint32_t* compact_page_marker;
+#endif
+};
+
+struct xml_memory_string_header {
+  uint16_t page_offset; // offset from page->data
+  uint16_t full_size; // 0 if string occupies whole page
+};
+
+struct xml_allocator {
+  xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) {
+#ifdef PUGIXML_COMPACT
+    _hash = 0;
+#endif
+  }
+
+  xml_memory_page* allocate_page(size_t data_size) {
+    size_t size = sizeof(xml_memory_page) + data_size;
+
+    // allocate block with some alignment, leaving memory for worst-case padding
+    void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+    if (!memory) return 0;
+
+    // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
+    char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
+
+    // prepare page structure
+    xml_memory_page* page = xml_memory_page::construct(page_memory);
+    assert(page);
+
+    page->allocator = _root->allocator;
+
+    // record the offset for freeing the memory block
+    assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127);
+    page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory));
+
+    return page;
+  }
+
+  static void deallocate_page(xml_memory_page* page) {
+    char* page_memory = reinterpret_cast<char*>(page);
+
+    xml_memory::deallocate(page_memory - page_memory[-1]);
+  }
+
+  void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+  void* allocate_memory(size_t size, xml_memory_page*& out_page) {
+    if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
+      return allocate_memory_oob(size, out_page);
+
+    void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
+
+    _busy_size += size;
+
+    out_page = _root;
+
+    return buf;
+  }
+
+#ifdef PUGIXML_COMPACT
+  void* allocate_object(size_t size, xml_memory_page*& out_page) {
+    void* result = allocate_memory(size + sizeof(uint32_t), out_page);
+    if (!result) return 0;
+
+    // adjust for marker
+    ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
+
+    if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) {
+      // insert new marker
+      uint32_t* marker = static_cast<uint32_t*>(result);
+
+      *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
+      out_page->compact_page_marker = marker;
+
+      // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
+      // this will make sure deallocate_memory correctly tracks the size
+      out_page->freed_size += sizeof(uint32_t);
+
+      return marker + 1;
+    } else {
+      // roll back uint32_t part
+      _busy_size -= sizeof(uint32_t);
+
+      return result;
+    }
+  }
+#else
+  void* allocate_object(size_t size, xml_memory_page*& out_page) {
+    return allocate_memory(size, out_page);
+  }
+#endif
+
+  void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) {
+    if (page == _root) page->busy_size = _busy_size;
+
+    assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
+    (void)!ptr;
+
+    page->freed_size += size;
+    assert(page->freed_size <= page->busy_size);
+
+    if (page->freed_size == page->busy_size) {
+      if (page->next == 0) {
+        assert(_root == page);
+
+        // top page freed, just reset sizes
+        page->busy_size = 0;
+        page->freed_size = 0;
+
+#ifdef PUGIXML_COMPACT
+        // reset compact state to maximize efficiency
+        page->compact_string_base = 0;
+        page->compact_shared_parent = 0;
+        page->compact_page_marker = 0;
+#endif
+
+        _busy_size = 0;
+      } else {
+        assert(_root != page);
+        assert(page->prev);
+
+        // remove from the list
+        page->prev->next = page->next;
+        page->next->prev = page->prev;
+
+        // deallocate
+        deallocate_page(page);
+      }
+    }
+  }
+
+  char_t* allocate_string(size_t length) {
+    static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
+
+    PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
+
+    // allocate memory for string and header block
+    size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+
+    // round size up to block alignment boundary
+    size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
+
+    xml_memory_page* page;
+    xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
+
+    if (!header) return 0;
+
+    // setup header
+    ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
+
+    assert(page_offset % xml_memory_block_alignment == 0);
+    assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
+    header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
+
+    // full_size == 0 for large strings that occupy the whole page
+    assert(full_size % xml_memory_block_alignment == 0);
+    assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
+    header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
+
+    // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+    // header is guaranteed a pointer-sized alignment, which should be enough for char_t
+    return static_cast<char_t*>(static_cast<void*>(header + 1));
+  }
+
+  void deallocate_string(char_t* string) {
+    // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+    // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+
+    // get header
+    xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
+    assert(header);
+
+    // deallocate
+    size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
+    xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
+
+    // if full_size == 0 then this string occupies the whole page
+    size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
+
+    deallocate_memory(header, full_size, page);
+  }
+
+  bool reserve() {
+#ifdef PUGIXML_COMPACT
+    return _hash->reserve();
+#else
+    return true;
+#endif
+  }
+
+  xml_memory_page* _root;
+  size_t _busy_size;
+
+#ifdef PUGIXML_COMPACT
+  compact_hash_table* _hash;
+#endif
+};
+
+PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+{
+  const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+  xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+  out_page = page;
+
+  if (!page) return 0;
+
+  if (size <= large_allocation_threshold) {
+    _root->busy_size = _busy_size;
+
+    // insert page at the end of linked list
+    page->prev = _root;
+    _root->next = page;
+    _root = page;
+
+    _busy_size = size;
+  } else {
+    // insert page before the end of linked list, so that it is deleted as soon as possible
+    // the last page is not deleted even if it's empty (see deallocate_memory)
+    assert(_root->prev);
+
+    page->prev = _root->prev;
+    page->next = _root;
+
+    _root->prev->next = page;
+    _root->prev = page;
+
+    page->busy_size = size;
+  }
+
+  return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
+}
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+static const uintptr_t compact_alignment_log2 = 2;
+static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
+
+class compact_header
+{
+public:
+  compact_header(xml_memory_page* page, unsigned int flags) {
+    PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
+
+    ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
+    assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
+
+    _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
+    _flags = static_cast<unsigned char>(flags);
+  }
+
+  void operator&=(uintptr_t mod) {
+    _flags &= static_cast<unsigned char>(mod);
+  }
+
+  void operator|=(uintptr_t mod) {
+    _flags |= static_cast<unsigned char>(mod);
+  }
+
+  uintptr_t operator&(uintptr_t mod) const {
+    return _flags & mod;
+  }
+
+  xml_memory_page* get_page() const {
+    // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+    const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
+    const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
+
+    return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
+  }
+
+private:
+  unsigned char _page;
+  unsigned char _flags;
+};
+
+PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
+{
+  const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
+
+  return header->get_page();
+}
+
+template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
+{
+  return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
+}
+
+template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
+{
+  *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
+}
+
+template <typename T, int header_offset, int start = -126> class compact_pointer
+{
+public:
+  compact_pointer(): _data(0) {
+  }
+
+  void operator=(const compact_pointer& rhs) {
+    *this = rhs + 0;
+  }
+
+  void operator=(T* value) {
+    if (value) {
+      // value is guaranteed to be compact-aligned; 'this' is not
+      // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+      // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+      // compensate for arithmetic shift rounding for negative values
+      ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
+      ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
+
+      if (static_cast<uintptr_t>(offset) <= 253)
+        _data = static_cast<unsigned char>(offset + 1);
+      else {
+        compact_set_value<header_offset>(this, value);
+
+        _data = 255;
+      }
+    } else
+      _data = 0;
+  }
+
+  operator T*() const {
+    if (_data) {
+      if (_data < 255) {
+        uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
+
+        return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
+      } else
+        return compact_get_value<header_offset, T>(this);
+    } else
+      return 0;
+  }
+
+  T* operator->() const {
+    return *this;
+  }
+
+private:
+  unsigned char _data;
+};
+
+template <typename T, int header_offset> class compact_pointer_parent
+{
+public:
+  compact_pointer_parent(): _data(0) {
+  }
+
+  void operator=(const compact_pointer_parent& rhs) {
+    *this = rhs + 0;
+  }
+
+  void operator=(T* value) {
+    if (value) {
+      // value is guaranteed to be compact-aligned; 'this' is not
+      // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+      // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+      // compensate for arithmetic shift behavior for negative values
+      ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
+      ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
+
+      if (static_cast<uintptr_t>(offset) <= 65533) {
+        _data = static_cast<unsigned short>(offset + 1);
+      } else {
+        xml_memory_page* page = compact_get_page(this, header_offset);
+
+        if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
+          page->compact_shared_parent = value;
+
+        if (page->compact_shared_parent == value) {
+          _data = 65534;
+        } else {
+          compact_set_value<header_offset>(this, value);
+
+          _data = 65535;
+        }
+      }
+    } else {
+      _data = 0;
+    }
+  }
+
+  operator T*() const {
+    if (_data) {
+      if (_data < 65534) {
+        uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
+
+        return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
+      } else if (_data == 65534)
+        return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
+      else
+        return compact_get_value<header_offset, T>(this);
+    } else
+      return 0;
+  }
+
+  T* operator->() const {
+    return *this;
+  }
+
+private:
+  uint16_t _data;
+};
+
+template <int header_offset, int base_offset> class compact_string
+{
+public:
+  compact_string(): _data(0) {
+  }
+
+  void operator=(const compact_string& rhs) {
+    *this = rhs + 0;
+  }
+
+  void operator=(char_t* value) {
+    if (value) {
+      xml_memory_page* page = compact_get_page(this, header_offset);
+
+      if (PUGI__UNLIKELY(page->compact_string_base == 0))
+        page->compact_string_base = value;
+
+      ptrdiff_t offset = value - page->compact_string_base;
+
+      if (static_cast<uintptr_t>(offset) < (65535 << 7)) {
+        // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+        uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
+
+        if (*base == 0) {
+          *base = static_cast<uint16_t>((offset >> 7) + 1);
+          _data = static_cast<unsigned char>((offset & 127) + 1);
+        } else {
+          ptrdiff_t remainder = offset - ((*base - 1) << 7);
+
+          if (static_cast<uintptr_t>(remainder) <= 253) {
+            _data = static_cast<unsigned char>(remainder + 1);
+          } else {
+            compact_set_value<header_offset>(this, value);
+
+            _data = 255;
+          }
+        }
+      } else {
+        compact_set_value<header_offset>(this, value);
+
+        _data = 255;
+      }
+    } else {
+      _data = 0;
+    }
+  }
+
+  operator char_t*() const {
+    if (_data) {
+      if (_data < 255) {
+        xml_memory_page* page = compact_get_page(this, header_offset);
+
+        // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+        const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
+        assert(*base);
+
+        ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
+
+        return page->compact_string_base + offset;
+      } else {
+        return compact_get_value<header_offset, char_t>(this);
+      }
+    } else
+      return 0;
+  }
+
+private:
+  unsigned char _data;
+};
+PUGI__NS_END
+#endif
+
+#ifdef PUGIXML_COMPACT
+namespace pugi
+{
+struct xml_attribute_struct {
+  xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) {
+    PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
+  }
+
+  impl::compact_header header;
+
+  uint16_t namevalue_base;
+
+  impl::compact_string<4, 2> name;
+  impl::compact_string<5, 3> value;
+
+  impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
+  impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
+};
+
+struct xml_node_struct {
+  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) {
+    PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
+  }
+
+  impl::compact_header header;
+
+  uint16_t namevalue_base;
+
+  impl::compact_string<4, 2> name;
+  impl::compact_string<5, 3> value;
+
+  impl::compact_pointer_parent<xml_node_struct, 6> parent;
+
+  impl::compact_pointer<xml_node_struct, 8, 0> first_child;
+
+  impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
+  impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
+
+  impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
+};
+}
+#else
+namespace pugi
+{
+struct xml_attribute_struct {
+  xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) {
+  }
+
+  uintptr_t header;
+
+  char_t*	name;
+  char_t*	value;
+
+  xml_attribute_struct* prev_attribute_c;
+  xml_attribute_struct* next_attribute;
+};
+
+struct xml_node_struct {
+  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) {
+  }
+
+  uintptr_t header;
+
+  char_t* name;
+  char_t* value;
+
+  xml_node_struct* parent;
+
+  xml_node_struct* first_child;
+
+  xml_node_struct* prev_sibling_c;
+  xml_node_struct* next_sibling;
+
+  xml_attribute_struct* first_attribute;
+};
+}
+#endif
+
+PUGI__NS_BEGIN
+struct xml_extra_buffer {
+  char_t* buffer;
+  xml_extra_buffer* next;
+};
+
+struct xml_document_struct: public xml_node_struct, public xml_allocator {
+  xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) {
+#ifdef PUGIXML_COMPACT
+    _hash = &hash;
+#endif
+  }
+
+  const char_t* buffer;
+
+  xml_extra_buffer* extra_buffers;
+
+#ifdef PUGIXML_COMPACT
+  compact_hash_table hash;
+#endif
+};
+
+template <typename Object> inline xml_allocator& get_allocator(const Object* object)
+{
+  assert(object);
+
+  return *PUGI__GETPAGE(object)->allocator;
+}
+
+template <typename Object> inline xml_document_struct& get_document(const Object* object)
+{
+  assert(object);
+
+  return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
+}
+PUGI__NS_END
+
+// Low-level DOM operations
+PUGI__NS_BEGIN
+inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+{
+  xml_memory_page* page;
+  void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
+  if (!memory) return 0;
+
+  return new (memory) xml_attribute_struct(page);
+}
+
+inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+{
+  xml_memory_page* page;
+  void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
+  if (!memory) return 0;
+
+  return new (memory) xml_node_struct(page, type);
+}
+
+inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+{
+  if (a->header & impl::xml_memory_page_name_allocated_mask)
+    alloc.deallocate_string(a->name);
+
+  if (a->header & impl::xml_memory_page_value_allocated_mask)
+    alloc.deallocate_string(a->value);
+
+  alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
+}
+
+inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+{
+  if (n->header & impl::xml_memory_page_name_allocated_mask)
+    alloc.deallocate_string(n->name);
+
+  if (n->header & impl::xml_memory_page_value_allocated_mask)
+    alloc.deallocate_string(n->value);
+
+  for (xml_attribute_struct* attr = n->first_attribute; attr; ) {
+    xml_attribute_struct* next = attr->next_attribute;
+
+    destroy_attribute(attr, alloc);
+
+    attr = next;
+  }
+
+  for (xml_node_struct* child = n->first_child; child; ) {
+    xml_node_struct* next = child->next_sibling;
+
+    destroy_node(child, alloc);
+
+    child = next;
+  }
+
+  alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
+}
+
+inline void append_node(xml_node_struct* child, xml_node_struct* node)
+{
+  child->parent = node;
+
+  xml_node_struct* head = node->first_child;
+
+  if (head) {
+    xml_node_struct* tail = head->prev_sibling_c;
+
+    tail->next_sibling = child;
+    child->prev_sibling_c = tail;
+    head->prev_sibling_c = child;
+  } else {
+    node->first_child = child;
+    child->prev_sibling_c = child;
+  }
+}
+
+inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
+{
+  child->parent = node;
+
+  xml_node_struct* head = node->first_child;
+
+  if (head) {
+    child->prev_sibling_c = head->prev_sibling_c;
+    head->prev_sibling_c = child;
+  } else
+    child->prev_sibling_c = child;
+
+  child->next_sibling = head;
+  node->first_child = child;
+}
+
+inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
+{
+  xml_node_struct* parent = node->parent;
+
+  child->parent = parent;
+
+  if (node->next_sibling)
+    node->next_sibling->prev_sibling_c = child;
+  else
+    parent->first_child->prev_sibling_c = child;
+
+  child->next_sibling = node->next_sibling;
+  child->prev_sibling_c = node;
+
+  node->next_sibling = child;
+}
+
+inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
+{
+  xml_node_struct* parent = node->parent;
+
+  child->parent = parent;
+
+  if (node->prev_sibling_c->next_sibling)
+    node->prev_sibling_c->next_sibling = child;
+  else
+    parent->first_child = child;
+
+  child->prev_sibling_c = node->prev_sibling_c;
+  child->next_sibling = node;
+
+  node->prev_sibling_c = child;
+}
+
+inline void remove_node(xml_node_struct* node)
+{
+  xml_node_struct* parent = node->parent;
+
+  if (node->next_sibling)
+    node->next_sibling->prev_sibling_c = node->prev_sibling_c;
+  else
+    parent->first_child->prev_sibling_c = node->prev_sibling_c;
+
+  if (node->prev_sibling_c->next_sibling)
+    node->prev_sibling_c->next_sibling = node->next_sibling;
+  else
+    parent->first_child = node->next_sibling;
+
+  node->parent = 0;
+  node->prev_sibling_c = 0;
+  node->next_sibling = 0;
+}
+
+inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+{
+  xml_attribute_struct* head = node->first_attribute;
+
+  if (head) {
+    xml_attribute_struct* tail = head->prev_attribute_c;
+
+    tail->next_attribute = attr;
+    attr->prev_attribute_c = tail;
+    head->prev_attribute_c = attr;
+  } else {
+    node->first_attribute = attr;
+    attr->prev_attribute_c = attr;
+  }
+}
+
+inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+{
+  xml_attribute_struct* head = node->first_attribute;
+
+  if (head) {
+    attr->prev_attribute_c = head->prev_attribute_c;
+    head->prev_attribute_c = attr;
+  } else
+    attr->prev_attribute_c = attr;
+
+  attr->next_attribute = head;
+  node->first_attribute = attr;
+}
+
+inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+{
+  if (place->next_attribute)
+    place->next_attribute->prev_attribute_c = attr;
+  else
+    node->first_attribute->prev_attribute_c = attr;
+
+  attr->next_attribute = place->next_attribute;
+  attr->prev_attribute_c = place;
+  place->next_attribute = attr;
+}
+
+inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+{
+  if (place->prev_attribute_c->next_attribute)
+    place->prev_attribute_c->next_attribute = attr;
+  else
+    node->first_attribute = attr;
+
+  attr->prev_attribute_c = place->prev_attribute_c;
+  attr->next_attribute = place;
+  place->prev_attribute_c = attr;
+}
+
+inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+{
+  if (attr->next_attribute)
+    attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
+  else
+    node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
+
+  if (attr->prev_attribute_c->next_attribute)
+    attr->prev_attribute_c->next_attribute = attr->next_attribute;
+  else
+    node->first_attribute = attr->next_attribute;
+
+  attr->prev_attribute_c = 0;
+  attr->next_attribute = 0;
+}
+
+PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+{
+  if (!alloc.reserve()) return 0;
+
+  xml_node_struct* child = allocate_node(alloc, type);
+  if (!child) return 0;
+
+  append_node(child, node);
+
+  return child;
+}
+
+PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
+{
+  if (!alloc.reserve()) return 0;
+
+  xml_attribute_struct* attr = allocate_attribute(alloc);
+  if (!attr) return 0;
+
+  append_attribute(attr, node);
+
+  return attr;
+}
+PUGI__NS_END
+
+// Helper classes for code generation
+PUGI__NS_BEGIN
+struct opt_false {
+  enum { value = 0 };
+};
+
+struct opt_true {
+  enum { value = 1 };
+};
+PUGI__NS_END
+
+// Unicode utilities
+PUGI__NS_BEGIN
+inline uint16_t endian_swap(uint16_t value)
+{
+  return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
+}
+
+inline uint32_t endian_swap(uint32_t value)
+{
+  return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+}
+
+struct utf8_counter {
+  typedef size_t value_type;
+
+  static value_type low(value_type result, uint32_t ch) {
+    // U+0000..U+007F
+    if (ch < 0x80) return result + 1;
+    // U+0080..U+07FF
+    else if (ch < 0x800) return result + 2;
+    // U+0800..U+FFFF
+    else return result + 3;
+  }
+
+  static value_type high(value_type result, uint32_t) {
+    // U+10000..U+10FFFF
+    return result + 4;
+  }
+};
+
+struct utf8_writer {
+  typedef uint8_t* value_type;
+
+  static value_type low(value_type result, uint32_t ch) {
+    // U+0000..U+007F
+    if (ch < 0x80) {
+      *result = static_cast<uint8_t>(ch);
+      return result + 1;
+    }
+    // U+0080..U+07FF
+    else if (ch < 0x800) {
+      result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
+      result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+      return result + 2;
+    }
+    // U+0800..U+FFFF
+    else {
+      result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
+      result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+      result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+      return result + 3;
+    }
+  }
+
+  static value_type high(value_type result, uint32_t ch) {
+    // U+10000..U+10FFFF
+    result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
+    result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
+    result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+    result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+    return result + 4;
+  }
+
+  static value_type any(value_type result, uint32_t ch) {
+    return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+  }
+};
+
+struct utf16_counter {
+  typedef size_t value_type;
+
+  static value_type low(value_type result, uint32_t) {
+    return result + 1;
+  }
+
+  static value_type high(value_type result, uint32_t) {
+    return result + 2;
+  }
+};
+
+struct utf16_writer {
+  typedef uint16_t* value_type;
+
+  static value_type low(value_type result, uint32_t ch) {
+    *result = static_cast<uint16_t>(ch);
+
+    return result + 1;
+  }
+
+  static value_type high(value_type result, uint32_t ch) {
+    uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
+    uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
+
+    result[0] = static_cast<uint16_t>(0xD800 + msh);
+    result[1] = static_cast<uint16_t>(0xDC00 + lsh);
+
+    return result + 2;
+  }
+
+  static value_type any(value_type result, uint32_t ch) {
+    return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+  }
+};
+
+struct utf32_counter {
+  typedef size_t value_type;
+
+  static value_type low(value_type result, uint32_t) {
+    return result + 1;
+  }
+
+  static value_type high(value_type result, uint32_t) {
+    return result + 1;
+  }
+};
+
+struct utf32_writer {
+  typedef uint32_t* value_type;
+
+  static value_type low(value_type result, uint32_t ch) {
+    *result = ch;
+
+    return result + 1;
+  }
+
+  static value_type high(value_type result, uint32_t ch) {
+    *result = ch;
+
+    return result + 1;
+  }
+
+  static value_type any(value_type result, uint32_t ch) {
+    *result = ch;
+
+    return result + 1;
+  }
+};
+
+struct latin1_writer {
+  typedef uint8_t* value_type;
+
+  static value_type low(value_type result, uint32_t ch) {
+    *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
+
+    return result + 1;
+  }
+
+  static value_type high(value_type result, uint32_t ch) {
+    (void)ch;
+
+    *result = '?';
+
+    return result + 1;
+  }
+};
+
+struct utf8_decoder {
+  typedef uint8_t type;
+
+  template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) {
+    const uint8_t utf8_byte_mask = 0x3f;
+
+    while (size) {
+      uint8_t lead = *data;
+
+      // 0xxxxxxx -> U+0000..U+007F
+      if (lead < 0x80) {
+        result = Traits::low(result, lead);
+        data += 1;
+        size -= 1;
+
+        // process aligned single-byte (ascii) blocks
+        if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) {
+          // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+          while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) {
+            result = Traits::low(result, data[0]);
+            result = Traits::low(result, data[1]);
+            result = Traits::low(result, data[2]);
+            result = Traits::low(result, data[3]);
+            data += 4;
+            size -= 4;
+          }
+        }
+      }
+      // 110xxxxx -> U+0080..U+07FF
+      else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) {
+        result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+        data += 2;
+        size -= 2;
+      }
+      // 1110xxxx -> U+0800-U+FFFF
+      else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) {
+        result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+        data += 3;
+        size -= 3;
+      }
+      // 11110xxx -> U+10000..U+10FFFF
+      else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) {
+        result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+        data += 4;
+        size -= 4;
+      }
+      // 10xxxxxx or 11111xxx -> invalid
+      else {
+        data += 1;
+        size -= 1;
+      }
+    }
+
+    return result;
+  }
+};
+
+template <typename opt_swap> struct utf16_decoder {
+  typedef uint16_t type;
+
+  template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) {
+    while (size) {
+      uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+      // U+0000..U+D7FF
+      if (lead < 0xD800) {
+        result = Traits::low(result, lead);
+        data += 1;
+        size -= 1;
+      }
+      // U+E000..U+FFFF
+      else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) {
+        result = Traits::low(result, lead);
+        data += 1;
+        size -= 1;
+      }
+      // surrogate pair lead
+      else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) {
+        uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+        if (static_cast<unsigned int>(next - 0xDC00) < 0x400) {
+          result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+          data += 2;
+          size -= 2;
+        } else {
+          data += 1;
+          size -= 1;
+        }
+      } else {
+        data += 1;
+        size -= 1;
+      }
+    }
+
+    return result;
+  }
+};
+
+template <typename opt_swap> struct utf32_decoder {
+  typedef uint32_t type;
+
+  template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) {
+    while (size) {
+      uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+      // U+0000..U+FFFF
+      if (lead < 0x10000) {
+        result = Traits::low(result, lead);
+        data += 1;
+        size -= 1;
+      }
+      // U+10000..U+10FFFF
+      else {
+        result = Traits::high(result, lead);
+        data += 1;
+        size -= 1;
+      }
+    }
+
+    return result;
+  }
+};
+
+struct latin1_decoder {
+  typedef uint8_t type;
+
+  template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) {
+    while (size) {
+      result = Traits::low(result, *data);
+      data += 1;
+      size -= 1;
+    }
+
+    return result;
+  }
+};
+
+template <size_t size> struct wchar_selector;
+
+template <> struct wchar_selector<2> {
+  typedef uint16_t type;
+  typedef utf16_counter counter;
+  typedef utf16_writer writer;
+  typedef utf16_decoder<opt_false> decoder;
+};
+
+template <> struct wchar_selector<4> {
+  typedef uint32_t type;
+  typedef utf32_counter counter;
+  typedef utf32_writer writer;
+  typedef utf32_decoder<opt_false> decoder;
+};
+
+typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+struct wchar_decoder {
+  typedef wchar_t type;
+
+  template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) {
+    typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
+
+    return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
+  }
+};
+
+#ifdef PUGIXML_WCHAR_MODE
+PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+{
+  for (size_t i = 0; i < length; ++i)
+    result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
+}
+#endif
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+enum chartype_t {
+  ct_parse_pcdata = 1,	// \0, &, \r, <
+  ct_parse_attr = 2,		// \0, &, \r, ', "
+  ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
+  ct_space = 8,			// \r, \n, space, tab
+  ct_parse_cdata = 16,	// \0, ], >, \r
+  ct_parse_comment = 32,	// \0, -, >, \r
+  ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+  ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
+};
+
+static const unsigned char chartype_table[256] = {
+  55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
+  0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
+  8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
+  64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
+  0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
+  0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
+
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
+  192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
+};
+
+enum chartypex_t {
+  ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+  ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+  ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
+  ctx_digit = 8,			  // 0-9
+  ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+};
+
+static const unsigned char chartypex_table[256] = {
+  3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
+  3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
+  0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
+  24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
+
+  0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
+  0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
+
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
+  20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
+};
+
+#ifdef PUGIXML_WCHAR_MODE
+#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
+#else
+#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
+#endif
+
+#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+PUGI__FN bool is_little_endian()
+{
+  unsigned int ui = 1;
+
+  return *reinterpret_cast<unsigned char*>(&ui) == 1;
+}
+
+PUGI__FN xml_encoding get_wchar_encoding()
+{
+  PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+  if (sizeof(wchar_t) == 2)
+    return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+  else
+    return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+}
+
+PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+{
+  // look for BOM in first few bytes
+  if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+  if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+  if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+  if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+  if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+  // look for <, <? or <?xm in various encodings
+  if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
+  if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
+  if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
+  if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
+  if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+
+  // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
+  if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
+  if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+
+  // no known BOM detected, assume utf8
+  return encoding_utf8;
+}
+
+PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
+{
+  // replace wchar encoding with utf implementation
+  if (encoding == encoding_wchar) return get_wchar_encoding();
+
+  // replace utf16 encoding with utf16 with specific endianness
+  if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+  // replace utf32 encoding with utf32 with specific endianness
+  if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+  // only do autodetection if no explicit encoding is requested
+  if (encoding != encoding_auto) return encoding;
+
+  // skip encoding autodetection if input buffer is too small
+  if (size < 4) return encoding_utf8;
+
+  // try to guess encoding (based on XML specification, Appendix F.1)
+  const uint8_t* data = static_cast<const uint8_t*>(contents);
+
+  PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+  return guess_buffer_encoding(d0, d1, d2, d3);
+}
+
+PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+  size_t length = size / sizeof(char_t);
+
+  if (is_mutable) {
+    out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+    out_length = length;
+  } else {
+    char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+    if (!buffer) return false;
+
+    if (contents)
+      memcpy(buffer, contents, length * sizeof(char_t));
+    else
+      assert(length == 0);
+
+    buffer[length] = 0;
+
+    out_buffer = buffer;
+    out_length = length + 1;
+  }
+
+  return true;
+}
+
+#ifdef PUGIXML_WCHAR_MODE
+PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+{
+  return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+         (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+}
+
+PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+  const char_t* data = static_cast<const char_t*>(contents);
+  size_t length = size / sizeof(char_t);
+
+  if (is_mutable) {
+    char_t* buffer = const_cast<char_t*>(data);
+
+    convert_wchar_endian_swap(buffer, data, length);
+
+    out_buffer = buffer;
+    out_length = length;
+  } else {
+    char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+    if (!buffer) return false;
+
+    convert_wchar_endian_swap(buffer, data, length);
+    buffer[length] = 0;
+
+    out_buffer = buffer;
+    out_length = length + 1;
+  }
+
+  return true;
+}
+
+template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
+{
+  const typename D::type* data = static_cast<const typename D::type*>(contents);
+  size_t data_length = size / sizeof(typename D::type);
+
+  // first pass: get length in wchar_t units
+  size_t length = D::process(data, data_length, 0, wchar_counter());
+
+  // allocate buffer of suitable length
+  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+  if (!buffer) return false;
+
+  // second pass: convert utf16 input to wchar_t
+  wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
+  wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
+
+  assert(oend == obegin + length);
+  *oend = 0;
+
+  out_buffer = buffer;
+  out_length = length + 1;
+
+  return true;
+}
+
+PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+{
+  // get native encoding
+  xml_encoding wchar_encoding = get_wchar_encoding();
+
+  // fast path: no conversion required
+  if (encoding == wchar_encoding)
+    return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+  // only endian-swapping is required
+  if (need_endian_swap_utf(encoding, wchar_encoding))
+    return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+  // source encoding is utf8
+  if (encoding == encoding_utf8)
+    return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
+
+  // source encoding is utf16
+  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+    return (native_encoding == encoding) ?
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
+  }
+
+  // source encoding is utf32
+  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+    return (native_encoding == encoding) ?
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
+  }
+
+  // source encoding is latin1
+  if (encoding == encoding_latin1)
+    return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
+
+  assert(!"Invalid encoding");
+  return false;
+}
+#else
+template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
+{
+  const typename D::type* data = static_cast<const typename D::type*>(contents);
+  size_t data_length = size / sizeof(typename D::type);
+
+  // first pass: get length in utf8 units
+  size_t length = D::process(data, data_length, 0, utf8_counter());
+
+  // allocate buffer of suitable length
+  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+  if (!buffer) return false;
+
+  // second pass: convert utf16 input to utf8
+  uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+  uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
+
+  assert(oend == obegin + length);
+  *oend = 0;
+
+  out_buffer = buffer;
+  out_length = length + 1;
+
+  return true;
+}
+
+PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+{
+  for (size_t i = 0; i < size; ++i)
+    if (data[i] > 127)
+      return i;
+
+  return size;
+}
+
+PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+{
+  const uint8_t* data = static_cast<const uint8_t*>(contents);
+  size_t data_length = size;
+
+  // get size of prefix that does not need utf8 conversion
+  size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+  assert(prefix_length <= data_length);
+
+  const uint8_t* postfix = data + prefix_length;
+  size_t postfix_length = data_length - prefix_length;
+
+  // if no conversion is needed, just return the original buffer
+  if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+  // first pass: get length in utf8 units
+  size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
+
+  // allocate buffer of suitable length
+  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+  if (!buffer) return false;
+
+  // second pass: convert latin1 input to utf8
+  memcpy(buffer, data, prefix_length);
+
+  uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
+  uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
+
+  assert(oend == obegin + length);
+  *oend = 0;
+
+  out_buffer = buffer;
+  out_length = length + 1;
+
+  return true;
+}
+
+PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+{
+  // fast path: no conversion required
+  if (encoding == encoding_utf8)
+    return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+  // source encoding is utf16
+  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+    return (native_encoding == encoding) ?
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
+  }
+
+  // source encoding is utf32
+  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+    return (native_encoding == encoding) ?
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
+           convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
+  }
+
+  // source encoding is latin1
+  if (encoding == encoding_latin1)
+    return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+
+  assert(!"Invalid encoding");
+  return false;
+}
+#endif
+
+PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+{
+  // get length in utf8 characters
+  return wchar_decoder::process(str, length, 0, utf8_counter());
+}
+
+PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+{
+  // convert to utf8
+  uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
+  uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
+
+  assert(begin + size == end);
+  (void)!end;
+  (void)!size;
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+{
+  // first pass: get length in utf8 characters
+  size_t size = as_utf8_begin(str, length);
+
+  // allocate resulting string
+  std::string result;
+  result.resize(size);
+
+  // second pass: convert to utf8
+  if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+  return result;
+}
+
+PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
+{
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
+
+  // first pass: get length in wchar_t units
+  size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
+
+  // allocate resulting string
+  std::basic_string<wchar_t> result;
+  result.resize(length);
+
+  // second pass: convert to wchar_t
+  if (length > 0) {
+    wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
+    wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
+
+    assert(begin + length == end);
+    (void)!end;
+  }
+
+  return result;
+}
+#endif
+
+template <typename Header>
+inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
+{
+  // never reuse shared memory
+  if (header & xml_memory_page_contents_shared_mask) return false;
+
+  size_t target_length = strlength(target);
+
+  // always reuse document buffer memory if possible
+  if ((header & header_mask) == 0) return target_length >= length;
+
+  // reuse heap memory if waste is not too great
+  const size_t reuse_threshold = 32;
+
+  return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
+{
+  if (source_length == 0) {
+    // empty string and null pointer are equivalent, so just deallocate old memory
+    xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+    if (header & header_mask) alloc->deallocate_string(dest);
+
+    // mark the string as not allocated
+    dest = 0;
+    header &= ~header_mask;
+
+    return true;
+  } else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) {
+    // we can reuse old buffer, so just copy the new data (including zero terminator)
+    memcpy(dest, source, source_length * sizeof(char_t));
+    dest[source_length] = 0;
+
+    return true;
+  } else {
+    xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+    if (!alloc->reserve()) return false;
+
+    // allocate new buffer
+    char_t* buf = alloc->allocate_string(source_length + 1);
+    if (!buf) return false;
+
+    // copy the string (including zero terminator)
+    memcpy(buf, source, source_length * sizeof(char_t));
+    buf[source_length] = 0;
+
+    // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+    if (header & header_mask) alloc->deallocate_string(dest);
+
+    // the string is now allocated, so set the flag
+    dest = buf;
+    header |= header_mask;
+
+    return true;
+  }
+}
+
+struct gap {
+  char_t* end;
+  size_t size;
+
+  gap(): end(0), size(0) {
+  }
+
+  // Push new gap, move s count bytes further (skipping the gap).
+  // Collapse previous gap.
+  void push(char_t*& s, size_t count) {
+    if (end) { // there was a gap already; collapse it
+      // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+      assert(s >= end);
+      memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+    }
+
+    s += count; // end of current gap
+
+    // "merge" two gaps
+    end = s;
+    size += count;
+  }
+
+  // Collapse all gaps, return past-the-end pointer
+  char_t* flush(char_t* s) {
+    if (end) {
+      // Move [old_gap_end, current_pos) to [old_gap_start, ...)
+      assert(s >= end);
+      memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
+
+      return s - size;
+    } else return s;
+  }
+};
+
+PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+{
+  char_t* stre = s + 1;
+
+  switch (*stre) {
+  case '#': {	// &#...
+    unsigned int ucsc = 0;
+
+    if (stre[1] == 'x') { // &#x... (hex code)
+      stre += 2;
+
+      char_t ch = *stre;
+
+      if (ch == ';') return stre;
+
+      for (;;) {
+        if (static_cast<unsigned int>(ch - '0') <= 9)
+          ucsc = 16 * ucsc + (ch - '0');
+        else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
+          ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+        else if (ch == ';')
+          break;
+        else // cancel
+          return stre;
+
+        ch = *++stre;
+      }
+
+      ++stre;
+    } else {	// &#... (dec code)
+      char_t ch = *++stre;
+
+      if (ch == ';') return stre;
+
+      for (;;) {
+        if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
+          ucsc = 10 * ucsc + (ch - '0');
+        else if (ch == ';')
+          break;
+        else // cancel
+          return stre;
+
+        ch = *++stre;
+      }
+
+      ++stre;
+    }
+
+#ifdef PUGIXML_WCHAR_MODE
+    s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
+#else
+    s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
+#endif
+
+    g.push(s, stre - s);
+    return stre;
+  }
+
+  case 'a': {	// &a
+    ++stre;
+
+    if (*stre == 'm') { // &am
+      if (*++stre == 'p' && *++stre == ';') { // &amp;
+        *s++ = '&';
+        ++stre;
+
+        g.push(s, stre - s);
+        return stre;
+      }
+    } else if (*stre == 'p') { // &ap
+      if (*++stre == 'o' && *++stre == 's' && *++stre == ';') { // &apos;
+        *s++ = '\'';
+        ++stre;
+
+        g.push(s, stre - s);
+        return stre;
+      }
+    }
+    break;
+  }
+
+  case 'g': { // &g
+    if (*++stre == 't' && *++stre == ';') { // &gt;
+      *s++ = '>';
+      ++stre;
+
+      g.push(s, stre - s);
+      return stre;
+    }
+    break;
+  }
+
+  case 'l': { // &l
+    if (*++stre == 't' && *++stre == ';') { // &lt;
+      *s++ = '<';
+      ++stre;
+
+      g.push(s, stre - s);
+      return stre;
+    }
+    break;
+  }
+
+  case 'q': { // &q
+    if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') { // &quot;
+      *s++ = '"';
+      ++stre;
+
+      g.push(s, stre - s);
+      return stre;
+    }
+    break;
+  }
+
+  default:
+    break;
+  }
+
+  return stre;
+}
+
+// Parser utilities
+#define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
+#define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+#define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
+#define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+#define PUGI__POPNODE()             { cursor = cursor->parent; }
+#define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
+#define PUGI__SCANWHILE(X)          { while (X) ++s; }
+#define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
+#define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
+#define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
+#define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
+
+PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+{
+  gap g;
+
+  while (true) {
+    PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
+
+    if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+      *s++ = '\n'; // replace first one with 0x0a
+
+      if (*s == '\n') g.push(s, 1);
+    } else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) { // comment ends here
+      *g.flush(s) = 0;
+
+      return s + (s[2] == '>' ? 3 : 2);
+    } else if (*s == 0) {
+      return 0;
+    } else ++s;
+  }
+}
+
+PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+{
+  gap g;
+
+  while (true) {
+    PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
+
+    if (*s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+      *s++ = '\n'; // replace first one with 0x0a
+
+      if (*s == '\n') g.push(s, 1);
+    } else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) { // CDATA ends here
+      *g.flush(s) = 0;
+
+      return s + 1;
+    } else if (*s == 0) {
+      return 0;
+    } else ++s;
+  }
+}
+
+typedef char_t* (*strconv_pcdata_t)(char_t*);
+
+template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl {
+  static char_t* parse(char_t* s) {
+    gap g;
+
+    char_t* begin = s;
+
+    while (true) {
+      PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
+
+      if (*s == '<') { // PCDATA ends here
+        char_t* end = g.flush(s);
+
+        if (opt_trim::value)
+          while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+            --end;
+
+        *end = 0;
+
+        return s + 1;
+      } else if (opt_eol::value && *s == '\r') { // Either a single 0x0d or 0x0d 0x0a pair
+        *s++ = '\n'; // replace first one with 0x0a
+
+        if (*s == '\n') g.push(s, 1);
+      } else if (opt_escape::value && *s == '&') {
+        s = strconv_escape(s, g);
+      } else if (*s == 0) {
+        char_t* end = g.flush(s);
+
+        if (opt_trim::value)
+          while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+            --end;
+
+        *end = 0;
+
+        return s;
+      } else ++s;
+    }
+  }
+};
+
+PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+{
+  PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
+
+  switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) { // get bitmask for flags (eol escapes trim)
+  case 0:
+    return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
+  case 1:
+    return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
+  case 2:
+    return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
+  case 3:
+    return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
+  case 4:
+    return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
+  case 5:
+    return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
+  case 6:
+    return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
+  case 7:
+    return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
+  default:
+    assert(false);
+    return 0; // should not get here
+  }
+}
+
+typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+
+template <typename opt_escape> struct strconv_attribute_impl {
+  static char_t* parse_wnorm(char_t* s, char_t end_quote) {
+    gap g;
+
+    // trim leading whitespaces
+    if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+      char_t* str = s;
+
+      do ++str;
+      while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+      g.push(s, str - s);
+    }
+
+    while (true) {
+      PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
+
+      if (*s == end_quote) {
+        char_t* str = g.flush(s);
+
+        do *str-- = 0;
+        while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+        return s + 1;
+      } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+        *s++ = ' ';
+
+        if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+          char_t* str = s + 1;
+          while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+
+          g.push(s, str - s);
+        }
+      } else if (opt_escape::value && *s == '&') {
+        s = strconv_escape(s, g);
+      } else if (!*s) {
+        return 0;
+      } else ++s;
+    }
+  }
+
+  static char_t* parse_wconv(char_t* s, char_t end_quote) {
+    gap g;
+
+    while (true) {
+      PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
+
+      if (*s == end_quote) {
+        *g.flush(s) = 0;
+
+        return s + 1;
+      } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
+        if (*s == '\r') {
+          *s++ = ' ';
+
+          if (*s == '\n') g.push(s, 1);
+        } else *s++ = ' ';
+      } else if (opt_escape::value && *s == '&') {
+        s = strconv_escape(s, g);
+      } else if (!*s) {
+        return 0;
+      } else ++s;
+    }
+  }
+
+  static char_t* parse_eol(char_t* s, char_t end_quote) {
+    gap g;
+
+    while (true) {
+      PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+      if (*s == end_quote) {
+        *g.flush(s) = 0;
+
+        return s + 1;
+      } else if (*s == '\r') {
+        *s++ = '\n';
+
+        if (*s == '\n') g.push(s, 1);
+      } else if (opt_escape::value && *s == '&') {
+        s = strconv_escape(s, g);
+      } else if (!*s) {
+        return 0;
+      } else ++s;
+    }
+  }
+
+  static char_t* parse_simple(char_t* s, char_t end_quote) {
+    gap g;
+
+    while (true) {
+      PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+      if (*s == end_quote) {
+        *g.flush(s) = 0;
+
+        return s + 1;
+      } else if (opt_escape::value && *s == '&') {
+        s = strconv_escape(s, g);
+      } else if (!*s) {
+        return 0;
+      } else ++s;
+    }
+  }
+};
+
+PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+{
+  PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+
+  switch ((optmask >> 4) & 15) { // get bitmask for flags (wconv wnorm eol escapes)
+  case 0:
+    return strconv_attribute_impl<opt_false>::parse_simple;
+  case 1:
+    return strconv_attribute_impl<opt_true>::parse_simple;
+  case 2:
+    return strconv_attribute_impl<opt_false>::parse_eol;
+  case 3:
+    return strconv_attribute_impl<opt_true>::parse_eol;
+  case 4:
+    return strconv_attribute_impl<opt_false>::parse_wconv;
+  case 5:
+    return strconv_attribute_impl<opt_true>::parse_wconv;
+  case 6:
+    return strconv_attribute_impl<opt_false>::parse_wconv;
+  case 7:
+    return strconv_attribute_impl<opt_true>::parse_wconv;
+  case 8:
+    return strconv_attribute_impl<opt_false>::parse_wnorm;
+  case 9:
+    return strconv_attribute_impl<opt_true>::parse_wnorm;
+  case 10:
+    return strconv_attribute_impl<opt_false>::parse_wnorm;
+  case 11:
+    return strconv_attribute_impl<opt_true>::parse_wnorm;
+  case 12:
+    return strconv_attribute_impl<opt_false>::parse_wnorm;
+  case 13:
+    return strconv_attribute_impl<opt_true>::parse_wnorm;
+  case 14:
+    return strconv_attribute_impl<opt_false>::parse_wnorm;
+  case 15:
+    return strconv_attribute_impl<opt_true>::parse_wnorm;
+  default:
+    assert(false);
+    return 0; // should not get here
+  }
+}
+
+inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+{
+  xml_parse_result result;
+  result.status = status;
+  result.offset = offset;
+
+  return result;
+}
+
+struct xml_parser {
+  xml_allocator alloc;
+  xml_allocator* alloc_state;
+  char_t* error_offset;
+  xml_parse_status error_status;
+
+  xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) {
+  }
+
+  ~xml_parser() {
+    *alloc_state = alloc;
+  }
+
+  // DOCTYPE consists of nested sections of the following possible types:
+  // <!-- ... -->, <? ... ?>, "...", '...'
+  // <![...]]>
+  // <!...>
+  // First group can not contain nested groups
+  // Second group can contain nested groups of the same type
+  // Third group can contain all other groups
+  char_t* parse_doctype_primitive(char_t* s) {
+    if (*s == '"' || *s == '\'') {
+      // quoted string
+      char_t ch = *s++;
+      PUGI__SCANFOR(*s == ch);
+      if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+      s++;
+    } else if (s[0] == '<' && s[1] == '?') {
+      // <? ... ?>
+      s += 2;
+      PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+      if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+      s += 2;
+    } else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') {
+      s += 4;
+      PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+      if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+      s += 3;
+    } else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+    return s;
+  }
+
+  char_t* parse_doctype_ignore(char_t* s) {
+    size_t depth = 0;
+
+    assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+    s += 3;
+
+    while (*s) {
+      if (s[0] == '<' && s[1] == '!' && s[2] == '[') {
+        // nested ignore section
+        s += 3;
+        depth++;
+      } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') {
+        // ignore section end
+        s += 3;
+
+        if (depth == 0)
+          return s;
+
+        depth--;
+      } else s++;
+    }
+
+    PUGI__THROW_ERROR(status_bad_doctype, s);
+  }
+
+  char_t* parse_doctype_group(char_t* s, char_t endch) {
+    size_t depth = 0;
+
+    assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
+    s += 2;
+
+    while (*s) {
+      if (s[0] == '<' && s[1] == '!' && s[2] != '-') {
+        if (s[2] == '[') {
+          // ignore
+          s = parse_doctype_ignore(s);
+          if (!s) return s;
+        } else {
+          // some control group
+          s += 2;
+          depth++;
+        }
+      } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') {
+        // unknown tag (forbidden), or some primitive group
+        s = parse_doctype_primitive(s);
+        if (!s) return s;
+      } else if (*s == '>') {
+        if (depth == 0)
+          return s;
+
+        depth--;
+        s++;
+      } else s++;
+    }
+
+    if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+    return s;
+  }
+
+  char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) {
+    // parse node contents, starting with exclamation mark
+    ++s;
+
+    if (*s == '-') { // '<!-...'
+      ++s;
+
+      if (*s == '-') { // '<!--...'
+        ++s;
+
+        if (PUGI__OPTSET(parse_comments)) {
+          PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
+          cursor->value = s; // Save the offset.
+        }
+
+        if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) {
+          s = strconv_comment(s, endch);
+
+          if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+        } else {
+          // Scan for terminating '-->'.
+          PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
+          PUGI__CHECK_ERROR(status_bad_comment, s);
+
+          if (PUGI__OPTSET(parse_comments))
+            *s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+          s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+        }
+      } else PUGI__THROW_ERROR(status_bad_comment, s);
+    } else if (*s == '[') {
+      // '<![CDATA[...'
+      if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') {
+        ++s;
+
+        if (PUGI__OPTSET(parse_cdata)) {
+          PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
+          cursor->value = s; // Save the offset.
+
+          if (PUGI__OPTSET(parse_eol)) {
+            s = strconv_cdata(s, endch);
+
+            if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+          } else {
+            // Scan for terminating ']]>'.
+            PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+            PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+            *s++ = 0; // Zero-terminate this segment.
+          }
+        } else { // Flagged for discard, but we still have to scan for the terminator.
+          // Scan for terminating ']]>'.
+          PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+          PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+          ++s;
+        }
+
+        s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+      } else PUGI__THROW_ERROR(status_bad_cdata, s);
+    } else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) {
+      s -= 2;
+
+      if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+      char_t* mark = s + 9;
+
+      s = parse_doctype_group(s, endch);
+      if (!s) return s;
+
+      assert((*s == 0 && endch == '>') || *s == '>');
+      if (*s) *s++ = 0;
+
+      if (PUGI__OPTSET(parse_doctype)) {
+        while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
+
+        PUGI__PUSHNODE(node_doctype);
+
+        cursor->value = mark;
+      }
+    } else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+    else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+    else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+
+    return s;
+  }
+
+  char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) {
+    // load into registers
+    xml_node_struct* cursor = ref_cursor;
+    char_t ch = 0;
+
+    // parse node contents, starting with question mark
+    ++s;
+
+    // read PI target
+    char_t* target = s;
+
+    if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+    PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+    PUGI__CHECK_ERROR(status_bad_pi, s);
+
+    // determine node type; stricmp / strcasecmp is not portable
+    bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+    if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) {
+      if (declaration) {
+        // disallow non top-level declarations
+        if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+        PUGI__PUSHNODE(node_declaration);
+      } else {
+        PUGI__PUSHNODE(node_pi);
+      }
+
+      cursor->name = target;
+
+      PUGI__ENDSEG();
+
+      // parse value/attributes
+      if (ch == '?') {
+        // empty node
+        if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+        s += (*s == '>');
+
+        PUGI__POPNODE();
+      } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+        PUGI__SKIPWS();
+
+        // scan for tag end
+        char_t* value = s;
+
+        PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+        PUGI__CHECK_ERROR(status_bad_pi, s);
+
+        if (declaration) {
+          // replace ending ? with / so that 'element' terminates properly
+          *s = '/';
+
+          // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+          s = value;
+        } else {
+          // store value and step over >
+          cursor->value = value;
+
+          PUGI__POPNODE();
+
+          PUGI__ENDSEG();
+
+          s += (*s == '>');
+        }
+      } else PUGI__THROW_ERROR(status_bad_pi, s);
+    } else {
+      // scan for tag end
+      PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+      PUGI__CHECK_ERROR(status_bad_pi, s);
+
+      s += (s[1] == '>' ? 2 : 1);
+    }
+
+    // store from registers
+    ref_cursor = cursor;
+
+    return s;
+  }
+
+  char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) {
+    strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+    strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+
+    char_t ch = 0;
+    xml_node_struct* cursor = root;
+    char_t* mark = s;
+
+    while (*s != 0) {
+      if (*s == '<') {
+        ++s;
+
+LOC_TAG:
+        if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // '<#...'
+          PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+          cursor->name = s;
+
+          PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+          PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+          if (ch == '>') {
+            // end of tag
+          } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+LOC_ATTRIBUTES:
+            while (true) {
+              PUGI__SKIPWS(); // Eat any whitespace.
+
+              if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) { // <... #...
+                xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
+                if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+                a->name = s; // Save the offset.
+
+                PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+                PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+                if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+                  PUGI__SKIPWS(); // Eat any whitespace.
+
+                  ch = *s;
+                  ++s;
+                }
+
+                if (ch == '=') { // '<... #=...'
+                  PUGI__SKIPWS(); // Eat any whitespace.
+
+                  if (*s == '"' || *s == '\'') { // '<... #="...'
+                    ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+                    ++s; // Step over the quote.
+                    a->value = s; // Save the offset.
+
+                    s = strconv_attribute(s, ch);
+
+                    if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+                    // After this line the loop continues from the start;
+                    // Whitespaces, / and > are ok, symbols and EOF are wrong,
+                    // everything else will be detected
+                    if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+                  } else PUGI__THROW_ERROR(status_bad_attribute, s);
+                } else PUGI__THROW_ERROR(status_bad_attribute, s);
+              } else if (*s == '/') {
+                ++s;
+
+                if (*s == '>') {
+                  PUGI__POPNODE();
+                  s++;
+                  break;
+                } else if (*s == 0 && endch == '>') {
+                  PUGI__POPNODE();
+                  break;
+                } else PUGI__THROW_ERROR(status_bad_start_element, s);
+              } else if (*s == '>') {
+                ++s;
+
+                break;
+              } else if (*s == 0 && endch == '>') {
+                break;
+              } else PUGI__THROW_ERROR(status_bad_start_element, s);
+            }
+
+            // !!!
+          } else if (ch == '/') { // '<#.../'
+            if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+            PUGI__POPNODE(); // Pop.
+
+            s += (*s == '>');
+          } else if (ch == 0) {
+            // we stepped over null terminator, backtrack & handle closing tag
+            --s;
+
+            if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+          } else PUGI__THROW_ERROR(status_bad_start_element, s);
+        } else if (*s == '/') {
+          ++s;
+
+          char_t* name = cursor->name;
+          if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+          while (PUGI__IS_CHARTYPE(*s, ct_symbol)) {
+            if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+          }
+
+          if (*name) {
+            if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+            else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+          }
+
+          PUGI__POPNODE(); // Pop.
+
+          PUGI__SKIPWS();
+
+          if (*s == 0) {
+            if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+          } else {
+            if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+            ++s;
+          }
+        } else if (*s == '?') { // '<?...'
+          s = parse_question(s, cursor, optmsk, endch);
+          if (!s) return s;
+
+          assert(cursor);
+          if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
+        } else if (*s == '!') { // '<!...'
+          s = parse_exclamation(s, cursor, optmsk, endch);
+          if (!s) return s;
+        } else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
+        else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+      } else {
+        mark = s; // Save this offset while searching for a terminator.
+
+        PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
+
+        if (*s == '<' || !*s) {
+          // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
+          assert(mark != s);
+
+          if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) {
+            continue;
+          } else if (PUGI__OPTSET(parse_ws_pcdata_single)) {
+            if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
+          }
+        }
+
+        if (!PUGI__OPTSET(parse_trim_pcdata))
+          s = mark;
+
+        if (cursor->parent || PUGI__OPTSET(parse_fragment)) {
+          PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+          cursor->value = s; // Save the offset.
+
+          s = strconv_pcdata(s);
+
+          PUGI__POPNODE(); // Pop since this is a standalone.
+
+          if (!*s) break;
+        } else {
+          PUGI__SCANFOR(*s == '<'); // '...<'
+          if (!*s) break;
+
+          ++s;
+        }
+
+        // We're after '<'
+        goto LOC_TAG;
+      }
+    }
+
+    // check that last tag is closed
+    if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+    return s;
+  }
+
+#ifdef PUGIXML_WCHAR_MODE
+  static char_t* parse_skip_bom(char_t* s) {
+    unsigned int bom = 0xfeff;
+    return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
+  }
+#else
+  static char_t* parse_skip_bom(char_t* s) {
+    return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
+  }
+#endif
+
+  static bool has_element_node_siblings(xml_node_struct* node) {
+    while (node) {
+      if (PUGI__NODETYPE(node) == node_element) return true;
+
+      node = node->next_sibling;
+    }
+
+    return false;
+  }
+
+  static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) {
+    // early-out for empty documents
+    if (length == 0)
+      return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
+
+    // get last child of the root before parsing
+    xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
+
+    // create parser on stack
+    xml_parser parser(static_cast<xml_allocator*>(xmldoc));
+
+    // save last character and make buffer zero-terminated (speeds up parsing)
+    char_t endch = buffer[length - 1];
+    buffer[length - 1] = 0;
+
+    // skip BOM to make sure it does not end up as part of parse output
+    char_t* buffer_data = parse_skip_bom(buffer);
+
+    // perform actual parsing
+    parser.parse_tree(buffer_data, root, optmsk, endch);
+
+    xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+    assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
+
+    if (result) {
+      // since we removed last character, we have to handle the only possible false positive (stray <)
+      if (endch == '<')
+        return make_parse_result(status_unrecognized_tag, length - 1);
+
+      // check if there are any element nodes parsed
+      xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
+
+      if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
+        return make_parse_result(status_no_document_element, length - 1);
+    } else {
+      // roll back offset if it occurs on a null terminator in the source buffer
+      if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
+        result.offset--;
+    }
+
+    return result;
+  }
+};
+
+// Output facilities
+PUGI__FN xml_encoding get_write_native_encoding()
+{
+#ifdef PUGIXML_WCHAR_MODE
+  return get_wchar_encoding();
+#else
+  return encoding_utf8;
+#endif
+}
+
+PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+{
+  // replace wchar encoding with utf implementation
+  if (encoding == encoding_wchar) return get_wchar_encoding();
+
+  // replace utf16 encoding with utf16 with specific endianness
+  if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+  // replace utf32 encoding with utf32 with specific endianness
+  if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+  // only do autodetection if no explicit encoding is requested
+  if (encoding != encoding_auto) return encoding;
+
+  // assume utf8 encoding
+  return encoding_utf8;
+}
+
+template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
+{
+  PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+  typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
+
+  return static_cast<size_t>(end - dest) * sizeof(*dest);
+}
+
+template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
+{
+  PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+  typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
+
+  if (opt_swap) {
+    for (typename T::value_type i = dest; i != end; ++i)
+      *i = endian_swap(*i);
+  }
+
+  return static_cast<size_t>(end - dest) * sizeof(*dest);
+}
+
+#ifdef PUGIXML_WCHAR_MODE
+PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+{
+  if (length < 1) return 0;
+
+  // discard last character if it's the lead of a surrogate pair
+  return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+}
+
+PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+{
+  // only endian-swapping is required
+  if (need_endian_swap_utf(encoding, get_wchar_encoding())) {
+    convert_wchar_endian_swap(r_char, data, length);
+
+    return length * sizeof(char_t);
+  }
+
+  // convert to utf8
+  if (encoding == encoding_utf8)
+    return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
+
+  // convert to utf16
+  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+    return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
+  }
+
+  // convert to utf32
+  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+    return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
+  }
+
+  // convert to latin1
+  if (encoding == encoding_latin1)
+    return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
+
+  assert(!"Invalid encoding");
+  return 0;
+}
+#else
+PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+{
+  if (length < 5) return 0;
+
+  for (size_t i = 1; i <= 4; ++i) {
+    uint8_t ch = static_cast<uint8_t>(data[length - i]);
+
+    // either a standalone character or a leading one
+    if ((ch & 0xc0) != 0x80) return length - i;
+  }
+
+  // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+  return length;
+}
+
+PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+{
+  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+    return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
+  }
+
+  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
+    xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+    return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
+  }
+
+  if (encoding == encoding_latin1)
+    return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
+
+  assert(!"Invalid encoding");
+  return 0;
+}
+#endif
+
+class xml_buffered_writer
+{
+  xml_buffered_writer(const xml_buffered_writer&);
+  xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+public:
+  xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) {
+    PUGI__STATIC_ASSERT(bufcapacity >= 8);
+  }
+
+  size_t flush() {
+    flush(buffer, bufsize);
+    bufsize = 0;
+    return 0;
+  }
+
+  void flush(const char_t* data, size_t size) {
+    if (size == 0) return;
+
+    // fast path, just write data
+    if (encoding == get_write_native_encoding())
+      writer.write(data, size * sizeof(char_t));
+    else {
+      // convert chunk
+      size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+      assert(result <= sizeof(scratch));
+
+      // write data
+      writer.write(scratch.data_u8, result);
+    }
+  }
+
+  void write_direct(const char_t* data, size_t length) {
+    // flush the remaining buffer contents
+    flush();
+
+    // handle large chunks
+    if (length > bufcapacity) {
+      if (encoding == get_write_native_encoding()) {
+        // fast path, can just write data chunk
+        writer.write(data, length * sizeof(char_t));
+        return;
+      }
+
+      // need to convert in suitable chunks
+      while (length > bufcapacity) {
+        // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+        // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+        size_t chunk_size = get_valid_length(data, bufcapacity);
+        assert(chunk_size);
+
+        // convert chunk and write
+        flush(data, chunk_size);
+
+        // iterate
+        data += chunk_size;
+        length -= chunk_size;
+      }
+
+      // small tail is copied below
+      bufsize = 0;
+    }
+
+    memcpy(buffer + bufsize, data, length * sizeof(char_t));
+    bufsize += length;
+  }
+
+  void write_buffer(const char_t* data, size_t length) {
+    size_t offset = bufsize;
+
+    if (offset + length <= bufcapacity) {
+      memcpy(buffer + offset, data, length * sizeof(char_t));
+      bufsize = offset + length;
+    } else {
+      write_direct(data, length);
+    }
+  }
+
+  void write_string(const char_t* data) {
+    // write the part of the string that fits in the buffer
+    size_t offset = bufsize;
+
+    while (*data && offset < bufcapacity)
+      buffer[offset++] = *data++;
+
+    // write the rest
+    if (offset < bufcapacity) {
+      bufsize = offset;
+    } else {
+      // backtrack a bit if we have split the codepoint
+      size_t length = offset - bufsize;
+      size_t extra = length - get_valid_length(data - length, length);
+
+      bufsize = offset - extra;
+
+      write_direct(data - extra, strlength(data) + extra);
+    }
+  }
+
+  void write(char_t d0) {
+    size_t offset = bufsize;
+    if (offset > bufcapacity - 1) offset = flush();
+
+    buffer[offset + 0] = d0;
+    bufsize = offset + 1;
+  }
+
+  void write(char_t d0, char_t d1) {
+    size_t offset = bufsize;
+    if (offset > bufcapacity - 2) offset = flush();
+
+    buffer[offset + 0] = d0;
+    buffer[offset + 1] = d1;
+    bufsize = offset + 2;
+  }
+
+  void write(char_t d0, char_t d1, char_t d2) {
+    size_t offset = bufsize;
+    if (offset > bufcapacity - 3) offset = flush();
+
+    buffer[offset + 0] = d0;
+    buffer[offset + 1] = d1;
+    buffer[offset + 2] = d2;
+    bufsize = offset + 3;
+  }
+
+  void write(char_t d0, char_t d1, char_t d2, char_t d3) {
+    size_t offset = bufsize;
+    if (offset > bufcapacity - 4) offset = flush();
+
+    buffer[offset + 0] = d0;
+    buffer[offset + 1] = d1;
+    buffer[offset + 2] = d2;
+    buffer[offset + 3] = d3;
+    bufsize = offset + 4;
+  }
+
+  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) {
+    size_t offset = bufsize;
+    if (offset > bufcapacity - 5) offset = flush();
+
+    buffer[offset + 0] = d0;
+    buffer[offset + 1] = d1;
+    buffer[offset + 2] = d2;
+    buffer[offset + 3] = d3;
+    buffer[offset + 4] = d4;
+    bufsize = offset + 5;
+  }
+
+  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) {
+    size_t offset = bufsize;
+    if (offset > bufcapacity - 6) offset = flush();
+
+    buffer[offset + 0] = d0;
+    buffer[offset + 1] = d1;
+    buffer[offset + 2] = d2;
+    buffer[offset + 3] = d3;
+    buffer[offset + 4] = d4;
+    buffer[offset + 5] = d5;
+    bufsize = offset + 6;
+  }
+
+  // utf8 maximum expansion: x4 (-> utf32)
+  // utf16 maximum expansion: x2 (-> utf32)
+  // utf32 maximum expansion: x1
+  enum {
+    bufcapacitybytes =
+#ifdef PUGIXML_MEMORY_OUTPUT_STACK
+    PUGIXML_MEMORY_OUTPUT_STACK
+#else
+    10240
+#endif
+    ,
+    bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+  };
+
+  char_t buffer[bufcapacity];
+
+  union {
+    uint8_t data_u8[4 * bufcapacity];
+    uint16_t data_u16[2 * bufcapacity];
+    uint32_t data_u32[bufcapacity];
+    char_t data_char[bufcapacity];
+  } scratch;
+
+  xml_writer& writer;
+  size_t bufsize;
+  xml_encoding encoding;
+};
+
+PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
+{
+  while (*s) {
+    const char_t* prev = s;
+
+    // While *s is a usual symbol
+    PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
+
+    writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+    switch (*s) {
+    case 0:
+      break;
+    case '&':
+      writer.write('&', 'a', 'm', 'p', ';');
+      ++s;
+      break;
+    case '<':
+      writer.write('&', 'l', 't', ';');
+      ++s;
+      break;
+    case '>':
+      writer.write('&', 'g', 't', ';');
+      ++s;
+      break;
+    case '"':
+      writer.write('&', 'q', 'u', 'o', 't', ';');
+      ++s;
+      break;
+    default: { // s is not a usual symbol
+      unsigned int ch = static_cast<unsigned int>(*s++);
+      assert(ch < 32);
+
+      writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
+    }
+    }
+  }
+}
+
+PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+{
+  if (flags & format_no_escapes)
+    writer.write_string(s);
+  else
+    text_output_escaped(writer, s, type);
+}
+
+PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+{
+  do {
+    writer.write('<', '!', '[', 'C', 'D');
+    writer.write('A', 'T', 'A', '[');
+
+    const char_t* prev = s;
+
+    // look for ]]> sequence - we can't output it as is since it terminates CDATA
+    while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+    // skip ]] if we stopped at ]]>, > will go to the next CDATA section
+    if (*s) s += 2;
+
+    writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+    writer.write(']', ']', '>');
+  } while (*s);
+}
+
+PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
+{
+  switch (indent_length) {
+  case 1: {
+    for (unsigned int i = 0; i < depth; ++i)
+      writer.write(indent[0]);
+    break;
+  }
+
+  case 2: {
+    for (unsigned int i = 0; i < depth; ++i)
+      writer.write(indent[0], indent[1]);
+    break;
+  }
+
+  case 3: {
+    for (unsigned int i = 0; i < depth; ++i)
+      writer.write(indent[0], indent[1], indent[2]);
+    break;
+  }
+
+  case 4: {
+    for (unsigned int i = 0; i < depth; ++i)
+      writer.write(indent[0], indent[1], indent[2], indent[3]);
+    break;
+  }
+
+  default: {
+    for (unsigned int i = 0; i < depth; ++i)
+      writer.write_buffer(indent, indent_length);
+  }
+  }
+}
+
+PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
+{
+  writer.write('<', '!', '-', '-');
+
+  while (*s) {
+    const char_t* prev = s;
+
+    // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
+    while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
+
+    writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+    if (*s) {
+      assert(*s == '-');
+
+      writer.write('-', ' ');
+      ++s;
+    }
+  }
+
+  writer.write('-', '-', '>');
+}
+
+PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
+{
+  while (*s) {
+    const char_t* prev = s;
+
+    // look for ?> sequence - we can't output it since ?> terminates PI
+    while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
+
+    writer.write_buffer(prev, static_cast<size_t>(s - prev));
+
+    if (*s) {
+      assert(s[0] == '?' && s[1] == '>');
+
+      writer.write('?', ' ', '>');
+      s += 2;
+    }
+  }
+}
+
+PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
+{
+  const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+  for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) {
+    if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) {
+      writer.write('\n');
+
+      text_output_indent(writer, indent, indent_length, depth + 1);
+    } else {
+      writer.write(' ');
+    }
+
+    writer.write_string(a->name ? a->name + 0 : default_name);
+    writer.write('=', '"');
+
+    if (a->value)
+      text_output(writer, a->value, ctx_special_attr, flags);
+
+    writer.write('"');
+  }
+}
+
+PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
+{
+  const char_t* default_name = PUGIXML_TEXT(":anonymous");
+  const char_t* name = node->name ? node->name + 0 : default_name;
+
+  writer.write('<');
+  writer.write_string(name);
+
+  if (node->first_attribute)
+    node_output_attributes(writer, node, indent, indent_length, flags, depth);
+
+  if (!node->first_child) {
+    writer.write(' ', '/', '>');
+
+    return false;
+  } else {
+    writer.write('>');
+
+    return true;
+  }
+}
+
+PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
+{
+  const char_t* default_name = PUGIXML_TEXT(":anonymous");
+  const char_t* name = node->name ? node->name + 0 : default_name;
+
+  writer.write('<', '/');
+  writer.write_string(name);
+  writer.write('>');
+}
+
+PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+{
+  const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+  switch (PUGI__NODETYPE(node)) {
+  case node_pcdata:
+    text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
+    break;
+
+  case node_cdata:
+    text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
+    break;
+
+  case node_comment:
+    node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
+    break;
+
+  case node_pi:
+    writer.write('<', '?');
+    writer.write_string(node->name ? node->name + 0 : default_name);
+
+    if (node->value) {
+      writer.write(' ');
+      node_output_pi_value(writer, node->value);
+    }
+
+    writer.write('?', '>');
+    break;
+
+  case node_declaration:
+    writer.write('<', '?');
+    writer.write_string(node->name ? node->name + 0 : default_name);
+    node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
+    writer.write('?', '>');
+    break;
+
+  case node_doctype:
+    writer.write('<', '!', 'D', 'O', 'C');
+    writer.write('T', 'Y', 'P', 'E');
+
+    if (node->value) {
+      writer.write(' ');
+      writer.write_string(node->value);
+    }
+
+    writer.write('>');
+    break;
+
+  default:
+    assert(!"Invalid node type");
+  }
+}
+
+enum indent_flags_t {
+  indent_newline = 1,
+  indent_indent = 2
+};
+
+PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
+{
+  size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
+  unsigned int indent_flags = indent_indent;
+
+  xml_node_struct* node = root;
+
+  do {
+    assert(node);
+
+    // begin writing current node
+    if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) {
+      node_output_simple(writer, node, flags);
+
+      indent_flags = 0;
+    } else {
+      if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+        writer.write('\n');
+
+      if ((indent_flags & indent_indent) && indent_length)
+        text_output_indent(writer, indent, indent_length, depth);
+
+      if (PUGI__NODETYPE(node) == node_element) {
+        indent_flags = indent_newline | indent_indent;
+
+        if (node_output_start(writer, node, indent, indent_length, flags, depth)) {
+          node = node->first_child;
+          depth++;
+          continue;
+        }
+      } else if (PUGI__NODETYPE(node) == node_document) {
+        indent_flags = indent_indent;
+
+        if (node->first_child) {
+          node = node->first_child;
+          continue;
+        }
+      } else {
+        node_output_simple(writer, node, flags);
+
+        indent_flags = indent_newline | indent_indent;
+      }
+    }
+
+    // continue to the next node
+    while (node != root) {
+      if (node->next_sibling) {
+        node = node->next_sibling;
+        break;
+      }
+
+      node = node->parent;
+
+      // write closing node
+      if (PUGI__NODETYPE(node) == node_element) {
+        depth--;
+
+        if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+          writer.write('\n');
+
+        if ((indent_flags & indent_indent) && indent_length)
+          text_output_indent(writer, indent, indent_length, depth);
+
+        node_output_end(writer, node);
+
+        indent_flags = indent_newline | indent_indent;
+      }
+    }
+  } while (node != root);
+
+  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
+    writer.write('\n');
+}
+
+PUGI__FN bool has_declaration(xml_node_struct* node)
+{
+  for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) {
+    xml_node_type type = PUGI__NODETYPE(child);
+
+    if (type == node_declaration) return true;
+    if (type == node_element) return false;
+  }
+
+  return false;
+}
+
+PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
+{
+  for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
+    if (a == attr)
+      return true;
+
+  return false;
+}
+
+PUGI__FN bool allow_insert_attribute(xml_node_type parent)
+{
+  return parent == node_element || parent == node_declaration;
+}
+
+PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
+{
+  if (parent != node_document && parent != node_element) return false;
+  if (child == node_document || child == node_null) return false;
+  if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
+
+  return true;
+}
+
+PUGI__FN bool allow_move(xml_node parent, xml_node child)
+{
+  // check that child can be a child of parent
+  if (!allow_insert_child(parent.type(), child.type()))
+    return false;
+
+  // check that node is not moved between documents
+  if (parent.root() != child.root())
+    return false;
+
+  // check that new parent is not in the child subtree
+  xml_node cur = parent;
+
+  while (cur) {
+    if (cur == child)
+      return false;
+
+    cur = cur.parent();
+  }
+
+  return true;
+}
+
+template <typename String, typename Header>
+PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
+{
+  assert(!dest && (header & header_mask) == 0);
+
+  if (source) {
+    if (alloc && (source_header & header_mask) == 0) {
+      dest = source;
+
+      // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
+      header |= xml_memory_page_contents_shared_mask;
+      source_header |= xml_memory_page_contents_shared_mask;
+    } else
+      strcpy_insitu(dest, header, header_mask, source, strlength(source));
+  }
+}
+
+PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
+{
+  node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
+  node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
+
+  for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) {
+    xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
+
+    if (da) {
+      node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+      node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+    }
+  }
+}
+
+PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
+{
+  xml_allocator& alloc = get_allocator(dn);
+  xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
+
+  node_copy_contents(dn, sn, shared_alloc);
+
+  xml_node_struct* dit = dn;
+  xml_node_struct* sit = sn->first_child;
+
+  while (sit && sit != sn) {
+    if (sit != dn) {
+      xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
+
+      if (copy) {
+        node_copy_contents(copy, sit, shared_alloc);
+
+        if (sit->first_child) {
+          dit = copy;
+          sit = sit->first_child;
+          continue;
+        }
+      }
+    }
+
+    // continue to the next node
+    do {
+      if (sit->next_sibling) {
+        sit = sit->next_sibling;
+        break;
+      }
+
+      sit = sit->parent;
+      dit = dit->parent;
+    } while (sit != sn);
+  }
+}
+
+PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
+{
+  xml_allocator& alloc = get_allocator(da);
+  xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
+
+  node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+  node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+}
+
+inline bool is_text_node(xml_node_struct* node)
+{
+  xml_node_type type = PUGI__NODETYPE(node);
+
+  return type == node_pcdata || type == node_cdata;
+}
+
+// get value with conversion functions
+template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
+{
+  U result = 0;
+  const char_t* s = value;
+
+  while (PUGI__IS_CHARTYPE(*s, ct_space))
+    s++;
+
+  bool negative = (*s == '-');
+
+  s += (*s == '+' || *s == '-');
+
+  bool overflow = false;
+
+  if (s[0] == '0' && (s[1] | ' ') == 'x') {
+    s += 2;
+
+    // since overflow detection relies on length of the sequence skip leading zeros
+    while (*s == '0')
+      s++;
+
+    const char_t* start = s;
+
+    for (;;) {
+      if (static_cast<unsigned>(*s - '0') < 10)
+        result = result * 16 + (*s - '0');
+      else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
+        result = result * 16 + ((*s | ' ') - 'a' + 10);
+      else
+        break;
+
+      s++;
+    }
+
+    size_t digits = static_cast<size_t>(s - start);
+
+    overflow = digits > sizeof(U) * 2;
+  } else {
+    // since overflow detection relies on length of the sequence skip leading zeros
+    while (*s == '0')
+      s++;
+
+    const char_t* start = s;
+
+    for (;;) {
+      if (static_cast<unsigned>(*s - '0') < 10)
+        result = result * 10 + (*s - '0');
+      else
+        break;
+
+      s++;
+    }
+
+    size_t digits = static_cast<size_t>(s - start);
+
+    PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
+
+    const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
+    const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
+    const size_t high_bit = sizeof(U) * 8 - 1;
+
+    overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
+  }
+
+  if (negative)
+    return (overflow || result > minneg) ? 0 - minneg : 0 - result;
+  else
+    return (overflow || result > maxpos) ? maxpos : result;
+}
+
+PUGI__FN int get_value_int(const char_t* value)
+{
+  return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
+}
+
+PUGI__FN unsigned int get_value_uint(const char_t* value)
+{
+  return string_to_integer<unsigned int>(value, 0, UINT_MAX);
+}
+
+PUGI__FN double get_value_double(const char_t* value)
+{
+#ifdef PUGIXML_WCHAR_MODE
+  return wcstod(value, 0);
+#else
+  return strtod(value, 0);
+#endif
+}
+
+PUGI__FN float get_value_float(const char_t* value)
+{
+#ifdef PUGIXML_WCHAR_MODE
+  return static_cast<float>(wcstod(value, 0));
+#else
+  return static_cast<float>(strtod(value, 0));
+#endif
+}
+
+PUGI__FN bool get_value_bool(const char_t* value)
+{
+  // only look at first char
+  char_t first = *value;
+
+  // 1*, t* (true), T* (True), y* (yes), Y* (YES)
+  return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN long long get_value_llong(const char_t* value)
+{
+  return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
+}
+
+PUGI__FN unsigned long long get_value_ullong(const char_t* value)
+{
+  return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
+}
+#endif
+
+template <typename U>
+PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
+{
+  char_t* result = end - 1;
+  U rest = negative ? 0 - value : value;
+
+  do {
+    *result-- = static_cast<char_t>('0' + (rest % 10));
+    rest /= 10;
+  } while (rest);
+
+  assert(result >= begin);
+  (void)begin;
+
+  *result = '-';
+
+  return result + !negative;
+}
+
+// set value with conversion functions
+template <typename String, typename Header>
+PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
+{
+#ifdef PUGIXML_WCHAR_MODE
+  char_t wbuf[128];
+  assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
+
+  size_t offset = 0;
+  for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
+
+  return strcpy_insitu(dest, header, header_mask, wbuf, offset);
+#else
+  return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
+#endif
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value)
+{
+  char_t buf[64];
+  char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+  char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0);
+
+  return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value)
+{
+  char_t buf[64];
+  char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+  char_t* begin = integer_to_string<unsigned int>(buf, end, value, false);
+
+  return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
+{
+  char buf[128];
+  sprintf(buf, "%.9g", value);
+
+  return set_value_ascii(dest, header, header_mask, buf);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
+{
+  char buf[128];
+  sprintf(buf, "%.17g", value);
+
+  return set_value_ascii(dest, header, header_mask, buf);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
+{
+  return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value)
+{
+  char_t buf[64];
+  char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+  char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0);
+
+  return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+
+template <typename String, typename Header>
+PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value)
+{
+  char_t buf[64];
+  char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
+  char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false);
+
+  return strcpy_insitu(dest, header, header_mask, begin, end - begin);
+}
+#endif
+
+PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
+{
+  // check input buffer
+  if (!contents && size) return make_parse_result(status_io_error);
+
+  // get actual encoding
+  xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
+
+  // get private buffer
+  char_t* buffer = 0;
+  size_t length = 0;
+
+  if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
+
+  // delete original buffer if we performed a conversion
+  if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
+
+  // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
+  if (own || buffer != contents) *out_buffer = buffer;
+
+  // store buffer for offset_debug
+  doc->buffer = buffer;
+
+  // parse
+  xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
+
+  // remember encoding
+  res.encoding = buffer_encoding;
+
+  return res;
+}
+
+// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
+PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+  // there are 64-bit versions of fseek/ftell, let's use them
+  typedef __int64 length_type;
+
+  _fseeki64(file, 0, SEEK_END);
+  length_type length = _ftelli64(file);
+  _fseeki64(file, 0, SEEK_SET);
+#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
+  // there are 64-bit versions of fseek/ftell, let's use them
+  typedef off64_t length_type;
+
+  fseeko64(file, 0, SEEK_END);
+  length_type length = ftello64(file);
+  fseeko64(file, 0, SEEK_SET);
+#else
+  // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
+  typedef long length_type;
+
+  fseek(file, 0, SEEK_END);
+  length_type length = ftell(file);
+  fseek(file, 0, SEEK_SET);
+#endif
+
+  // check for I/O errors
+  if (length < 0) return status_io_error;
+
+  // check for overflow
+  size_t result = static_cast<size_t>(length);
+
+  if (static_cast<length_type>(result) != length) return status_out_of_memory;
+
+  // finalize
+  out_result = result;
+
+  return status_ok;
+}
+
+// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
+PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
+{
+  // We only need to zero-terminate if encoding conversion does not do it for us
+#ifdef PUGIXML_WCHAR_MODE
+  xml_encoding wchar_encoding = get_wchar_encoding();
+
+  if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) {
+    size_t length = size / sizeof(char_t);
+
+    static_cast<char_t*>(buffer)[length] = 0;
+    return (length + 1) * sizeof(char_t);
+  }
+#else
+  if (encoding == encoding_utf8) {
+    static_cast<char*>(buffer)[size] = 0;
+    return size + 1;
+  }
+#endif
+
+  return size;
+}
+
+PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
+{
+  if (!file) return make_parse_result(status_file_not_found);
+
+  // get file size (can result in I/O errors)
+  size_t size = 0;
+  xml_parse_status size_status = get_file_size(file, size);
+  if (size_status != status_ok) return make_parse_result(size_status);
+
+  size_t max_suffix_size = sizeof(char_t);
+
+  // allocate buffer for the whole file
+  char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
+  if (!contents) return make_parse_result(status_out_of_memory);
+
+  // read file in memory
+  size_t read_size = fread(contents, 1, size, file);
+
+  if (read_size != size) {
+    xml_memory::deallocate(contents);
+    return make_parse_result(status_io_error);
+  }
+
+  xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
+
+  return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
+}
+
+#ifndef PUGIXML_NO_STL
+template <typename T> struct xml_stream_chunk {
+  static xml_stream_chunk* create() {
+    void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
+    if (!memory) return 0;
+
+    return new (memory) xml_stream_chunk();
+  }
+
+  static void destroy(xml_stream_chunk* chunk) {
+    // free chunk chain
+    while (chunk) {
+      xml_stream_chunk* next_ = chunk->next;
+
+      xml_memory::deallocate(chunk);
+
+      chunk = next_;
+    }
+  }
+
+  xml_stream_chunk(): next(0), size(0) {
+  }
+
+  xml_stream_chunk* next;
+  size_t size;
+
+  T data[xml_memory_page_size / sizeof(T)];
+};
+
+template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+{
+  auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
+
+  // read file to a chunk list
+  size_t total = 0;
+  xml_stream_chunk<T>* last = 0;
+
+  while (!stream.eof()) {
+    // allocate new chunk
+    xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
+    if (!chunk) return status_out_of_memory;
+
+    // append chunk to list
+    if (last) last = last->next = chunk;
+    else chunks.data = last = chunk;
+
+    // read data to chunk
+    stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
+    chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
+
+    // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
+    if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+    // guard against huge files (chunk size is small enough to make this overflow check work)
+    if (total + chunk->size < total) return status_out_of_memory;
+    total += chunk->size;
+  }
+
+  size_t max_suffix_size = sizeof(char_t);
+
+  // copy chunk list to a contiguous buffer
+  char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
+  if (!buffer) return status_out_of_memory;
+
+  char* write = buffer;
+
+  for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) {
+    assert(write + chunk->size <= buffer + total);
+    memcpy(write, chunk->data, chunk->size);
+    write += chunk->size;
+  }
+
+  assert(write == buffer + total);
+
+  // return buffer
+  *out_buffer = buffer;
+  *out_size = total;
+
+  return status_ok;
+}
+
+template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
+{
+  // get length of remaining data in stream
+  typename std::basic_istream<T>::pos_type pos = stream.tellg();
+  stream.seekg(0, std::ios::end);
+  std::streamoff length = stream.tellg() - pos;
+  stream.seekg(pos);
+
+  if (stream.fail() || pos < 0) return status_io_error;
+
+  // guard against huge files
+  size_t read_length = static_cast<size_t>(length);
+
+  if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
+
+  size_t max_suffix_size = sizeof(char_t);
+
+  // read stream data into memory (guard against stream exceptions with buffer holder)
+  auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
+  if (!buffer.data) return status_out_of_memory;
+
+  stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
+
+  // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
+  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
+
+  // return buffer
+  size_t actual_length = static_cast<size_t>(stream.gcount());
+  assert(actual_length <= read_length);
+
+  *out_buffer = buffer.release();
+  *out_size = actual_length * sizeof(T);
+
+  return status_ok;
+}
+
+template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
+{
+  void* buffer = 0;
+  size_t size = 0;
+  xml_parse_status status = status_ok;
+
+  // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
+  if (stream.fail()) return make_parse_result(status_io_error);
+
+  // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
+  if (stream.tellg() < 0) {
+    stream.clear(); // clear error flags that could be set by a failing tellg
+    status = load_stream_data_noseek(stream, &buffer, &size);
+  } else
+    status = load_stream_data_seek(stream, &buffer, &size);
+
+  if (status != status_ok) return make_parse_result(status);
+
+  xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
+
+  return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
+}
+#endif
+
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
+PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+{
+  return _wfopen(path, mode);
+}
+#else
+PUGI__FN char* convert_path_heap(const wchar_t* str)
+{
+  assert(str);
+
+  // first pass: get length in utf8 characters
+  size_t length = strlength_wide(str);
+  size_t size = as_utf8_begin(str, length);
+
+  // allocate resulting string
+  char* result = static_cast<char*>(xml_memory::allocate(size + 1));
+  if (!result) return 0;
+
+  // second pass: convert to utf8
+  as_utf8_end(result, size, str, length);
+
+  // zero-terminate
+  result[size] = 0;
+
+  return result;
+}
+
+PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
+{
+  // there is no standard function to open wide paths, so our best bet is to try utf8 path
+  char* path_utf8 = convert_path_heap(path);
+  if (!path_utf8) return 0;
+
+  // convert mode to ASCII (we mirror _wfopen interface)
+  char mode_ascii[4] = {0};
+  for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
+
+  // try to open the utf8 path
+  FILE* result = fopen(path_utf8, mode_ascii);
+
+  // free dummy buffer
+  xml_memory::deallocate(path_utf8);
+
+  return result;
+}
+#endif
+
+PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
+{
+  if (!file) return false;
+
+  xml_writer_file writer(file);
+  doc.save(writer, indent, flags, encoding);
+
+  return ferror(file) == 0;
+}
+
+struct name_null_sentry {
+  xml_node_struct* node;
+  char_t* name;
+
+  name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) {
+    node->name = 0;
+  }
+
+  ~name_null_sentry() {
+    node->name = name;
+  }
+};
+PUGI__NS_END
+
+namespace pugi
+{
+PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
+{
+}
+
+PUGI__FN void xml_writer_file::write(const void* data, size_t size)
+{
+  size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
+  (void)!result; // unfortunately we can't do proper error handling here
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
+{
+}
+
+PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
+{
+}
+
+PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
+{
+  if (narrow_stream) {
+    assert(!wide_stream);
+    narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
+  } else {
+    assert(wide_stream);
+    assert(size % sizeof(wchar_t) == 0);
+
+    wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
+  }
+}
+#endif
+
+PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
+{
+}
+
+PUGI__FN xml_tree_walker::~xml_tree_walker()
+{
+}
+
+PUGI__FN int xml_tree_walker::depth() const
+{
+  return _depth;
+}
+
+PUGI__FN bool xml_tree_walker::begin(xml_node&)
+{
+  return true;
+}
+
+PUGI__FN bool xml_tree_walker::end(xml_node&)
+{
+  return true;
+}
+
+PUGI__FN xml_attribute::xml_attribute(): _attr(0)
+{
+}
+
+PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
+{
+}
+
+PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
+{
+  return _attr ? unspecified_bool_xml_attribute : 0;
+}
+
+PUGI__FN bool xml_attribute::operator!() const
+{
+  return !_attr;
+}
+
+PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
+{
+  return (_attr == r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
+{
+  return (_attr != r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
+{
+  return (_attr < r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
+{
+  return (_attr > r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
+{
+  return (_attr <= r._attr);
+}
+
+PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
+{
+  return (_attr >= r._attr);
+}
+
+PUGI__FN xml_attribute xml_attribute::next_attribute() const
+{
+  return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
+}
+
+PUGI__FN xml_attribute xml_attribute::previous_attribute() const
+{
+  return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
+}
+
+PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
+{
+  return (_attr && _attr->value) ? _attr->value + 0 : def;
+}
+
+PUGI__FN int xml_attribute::as_int(int def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
+}
+
+PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
+}
+
+PUGI__FN double xml_attribute::as_double(double def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
+}
+
+PUGI__FN float xml_attribute::as_float(float def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
+}
+
+PUGI__FN bool xml_attribute::as_bool(bool def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN long long xml_attribute::as_llong(long long def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
+}
+
+PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
+{
+  return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
+}
+#endif
+
+PUGI__FN bool xml_attribute::empty() const
+{
+  return !_attr;
+}
+
+PUGI__FN const char_t* xml_attribute::name() const
+{
+  return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_attribute::value() const
+{
+  return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN size_t xml_attribute::hash_value() const
+{
+  return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
+}
+
+PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
+{
+  return _attr;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+
+PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
+{
+  set_value(rhs);
+  return *this;
+}
+#endif
+
+PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
+{
+  if (!_attr) return false;
+
+  return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
+{
+  if (!_attr) return false;
+
+  return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN bool xml_attribute::set_value(int rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(double rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(float rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(bool rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN bool xml_attribute::set_value(long long rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+
+PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
+{
+  if (!_attr) return false;
+
+  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
+}
+#endif
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
+{
+  return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
+{
+  return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN xml_node::xml_node(): _root(0)
+{
+}
+
+PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_node(xml_node***)
+{
+}
+
+PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
+{
+  return _root ? unspecified_bool_xml_node : 0;
+}
+
+PUGI__FN bool xml_node::operator!() const
+{
+  return !_root;
+}
+
+PUGI__FN xml_node::iterator xml_node::begin() const
+{
+  return iterator(_root ? _root->first_child + 0 : 0, _root);
+}
+
+PUGI__FN xml_node::iterator xml_node::end() const
+{
+  return iterator(0, _root);
+}
+
+PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
+{
+  return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
+}
+
+PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
+{
+  return attribute_iterator(0, _root);
+}
+
+PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
+{
+  return xml_object_range<xml_node_iterator>(begin(), end());
+}
+
+PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
+{
+  return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
+}
+
+PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
+{
+  return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
+}
+
+PUGI__FN bool xml_node::operator==(const xml_node& r) const
+{
+  return (_root == r._root);
+}
+
+PUGI__FN bool xml_node::operator!=(const xml_node& r) const
+{
+  return (_root != r._root);
+}
+
+PUGI__FN bool xml_node::operator<(const xml_node& r) const
+{
+  return (_root < r._root);
+}
+
+PUGI__FN bool xml_node::operator>(const xml_node& r) const
+{
+  return (_root > r._root);
+}
+
+PUGI__FN bool xml_node::operator<=(const xml_node& r) const
+{
+  return (_root <= r._root);
+}
+
+PUGI__FN bool xml_node::operator>=(const xml_node& r) const
+{
+  return (_root >= r._root);
+}
+
+PUGI__FN bool xml_node::empty() const
+{
+  return !_root;
+}
+
+PUGI__FN const char_t* xml_node::name() const
+{
+  return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN xml_node_type xml_node::type() const
+{
+  return _root ? PUGI__NODETYPE(_root) : node_null;
+}
+
+PUGI__FN const char_t* xml_node::value() const
+{
+  return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN xml_node xml_node::child(const char_t* name_) const
+{
+  if (!_root) return xml_node();
+
+  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+    if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+  return xml_node();
+}
+
+PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
+{
+  if (!_root) return xml_attribute();
+
+  for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
+    if (i->name && impl::strequal(name_, i->name))
+      return xml_attribute(i);
+
+  return xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
+{
+  if (!_root) return xml_node();
+
+  for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
+    if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+  return xml_node();
+}
+
+PUGI__FN xml_node xml_node::next_sibling() const
+{
+  return _root ? xml_node(_root->next_sibling) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
+{
+  if (!_root) return xml_node();
+
+  for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
+    if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
+
+  return xml_node();
+}
+
+PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
+{
+  xml_attribute_struct* hint = hint_._attr;
+
+  // if hint is not an attribute of node, behavior is not defined
+  assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
+
+  if (!_root) return xml_attribute();
+
+  // optimistically search from hint up until the end
+  for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
+    if (i->name && impl::strequal(name_, i->name)) {
+      // update hint to maximize efficiency of searching for consecutive attributes
+      hint_._attr = i->next_attribute;
+
+      return xml_attribute(i);
+    }
+
+  // wrap around and search from the first attribute until the hint
+  // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
+  for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
+    if (j->name && impl::strequal(name_, j->name)) {
+      // update hint to maximize efficiency of searching for consecutive attributes
+      hint_._attr = j->next_attribute;
+
+      return xml_attribute(j);
+    }
+
+  return xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::previous_sibling() const
+{
+  if (!_root) return xml_node();
+
+  if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
+  else return xml_node();
+}
+
+PUGI__FN xml_node xml_node::parent() const
+{
+  return _root ? xml_node(_root->parent) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::root() const
+{
+  return _root ? xml_node(&impl::get_document(_root)) : xml_node();
+}
+
+PUGI__FN xml_text xml_node::text() const
+{
+  return xml_text(_root);
+}
+
+PUGI__FN const char_t* xml_node::child_value() const
+{
+  if (!_root) return PUGIXML_TEXT("");
+
+  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+    if (impl::is_text_node(i) && i->value)
+      return i->value;
+
+  return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
+{
+  return child(name_).child_value();
+}
+
+PUGI__FN xml_attribute xml_node::first_attribute() const
+{
+  return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
+}
+
+PUGI__FN xml_attribute xml_node::last_attribute() const
+{
+  return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
+}
+
+PUGI__FN xml_node xml_node::first_child() const
+{
+  return _root ? xml_node(_root->first_child) : xml_node();
+}
+
+PUGI__FN xml_node xml_node::last_child() const
+{
+  return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
+}
+
+PUGI__FN bool xml_node::set_name(const char_t* rhs)
+{
+  xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
+
+  if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
+    return false;
+
+  return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN bool xml_node::set_value(const char_t* rhs)
+{
+  xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
+
+  if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
+    return false;
+
+  return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
+}
+
+PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
+{
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::append_attribute(a._attr, _root);
+
+  a.set_name(name_);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
+{
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::prepend_attribute(a._attr, _root);
+
+  a.set_name(name_);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
+{
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+  if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::insert_attribute_after(a._attr, attr._attr, _root);
+
+  a.set_name(name_);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
+{
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+  if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::insert_attribute_before(a._attr, attr._attr, _root);
+
+  a.set_name(name_);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
+{
+  if (!proto) return xml_attribute();
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::append_attribute(a._attr, _root);
+  impl::node_copy_attribute(a._attr, proto._attr);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
+{
+  if (!proto) return xml_attribute();
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::prepend_attribute(a._attr, _root);
+  impl::node_copy_attribute(a._attr, proto._attr);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
+{
+  if (!proto) return xml_attribute();
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+  if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::insert_attribute_after(a._attr, attr._attr, _root);
+  impl::node_copy_attribute(a._attr, proto._attr);
+
+  return a;
+}
+
+PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
+{
+  if (!proto) return xml_attribute();
+  if (!impl::allow_insert_attribute(type())) return xml_attribute();
+  if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_attribute();
+
+  xml_attribute a(impl::allocate_attribute(alloc));
+  if (!a) return xml_attribute();
+
+  impl::insert_attribute_before(a._attr, attr._attr, _root);
+  impl::node_copy_attribute(a._attr, proto._attr);
+
+  return a;
+}
+
+PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
+{
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::append_node(n._root, _root);
+
+  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
+{
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::prepend_node(n._root, _root);
+
+  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
+{
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+  if (!node._root || node._root->parent != _root) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::insert_node_before(n._root, node._root);
+
+  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
+{
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+  if (!node._root || node._root->parent != _root) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::insert_node_after(n._root, node._root);
+
+  if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::append_child(const char_t* name_)
+{
+  xml_node result = append_child(node_element);
+
+  result.set_name(name_);
+
+  return result;
+}
+
+PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
+{
+  xml_node result = prepend_child(node_element);
+
+  result.set_name(name_);
+
+  return result;
+}
+
+PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
+{
+  xml_node result = insert_child_after(node_element, node);
+
+  result.set_name(name_);
+
+  return result;
+}
+
+PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
+{
+  xml_node result = insert_child_before(node_element, node);
+
+  result.set_name(name_);
+
+  return result;
+}
+
+PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
+{
+  xml_node_type type_ = proto.type();
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::append_node(n._root, _root);
+  impl::node_copy_tree(n._root, proto._root);
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
+{
+  xml_node_type type_ = proto.type();
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::prepend_node(n._root, _root);
+  impl::node_copy_tree(n._root, proto._root);
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
+{
+  xml_node_type type_ = proto.type();
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+  if (!node._root || node._root->parent != _root) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::insert_node_after(n._root, node._root);
+  impl::node_copy_tree(n._root, proto._root);
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
+{
+  xml_node_type type_ = proto.type();
+  if (!impl::allow_insert_child(type(), type_)) return xml_node();
+  if (!node._root || node._root->parent != _root) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  xml_node n(impl::allocate_node(alloc, type_));
+  if (!n) return xml_node();
+
+  impl::insert_node_before(n._root, node._root);
+  impl::node_copy_tree(n._root, proto._root);
+
+  return n;
+}
+
+PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
+{
+  if (!impl::allow_move(*this, moved)) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+  impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+  impl::remove_node(moved._root);
+  impl::append_node(moved._root, _root);
+
+  return moved;
+}
+
+PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
+{
+  if (!impl::allow_move(*this, moved)) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+  impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+  impl::remove_node(moved._root);
+  impl::prepend_node(moved._root, _root);
+
+  return moved;
+}
+
+PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
+{
+  if (!impl::allow_move(*this, moved)) return xml_node();
+  if (!node._root || node._root->parent != _root) return xml_node();
+  if (moved._root == node._root) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+  impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+  impl::remove_node(moved._root);
+  impl::insert_node_after(moved._root, node._root);
+
+  return moved;
+}
+
+PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
+{
+  if (!impl::allow_move(*this, moved)) return xml_node();
+  if (!node._root || node._root->parent != _root) return xml_node();
+  if (moved._root == node._root) return xml_node();
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return xml_node();
+
+  // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
+  impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
+
+  impl::remove_node(moved._root);
+  impl::insert_node_before(moved._root, node._root);
+
+  return moved;
+}
+
+PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
+{
+  return remove_attribute(attribute(name_));
+}
+
+PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
+{
+  if (!_root || !a._attr) return false;
+  if (!impl::is_attribute_of(a._attr, _root)) return false;
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return false;
+
+  impl::remove_attribute(a._attr, _root);
+  impl::destroy_attribute(a._attr, alloc);
+
+  return true;
+}
+
+PUGI__FN bool xml_node::remove_child(const char_t* name_)
+{
+  return remove_child(child(name_));
+}
+
+PUGI__FN bool xml_node::remove_child(const xml_node& n)
+{
+  if (!_root || !n._root || n._root->parent != _root) return false;
+
+  impl::xml_allocator& alloc = impl::get_allocator(_root);
+  if (!alloc.reserve()) return false;
+
+  impl::remove_node(n._root);
+  impl::destroy_node(n._root, alloc);
+
+  return true;
+}
+
+PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+  // append_buffer is only valid for elements/documents
+  if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
+
+  // get document node
+  impl::xml_document_struct* doc = &impl::get_document(_root);
+
+  // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
+  doc->header |= impl::xml_memory_page_contents_shared_mask;
+
+  // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
+  impl::xml_memory_page* page = 0;
+  impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
+  (void)page;
+
+  if (!extra) return impl::make_parse_result(status_out_of_memory);
+
+  // add extra buffer to the list
+  extra->buffer = 0;
+  extra->next = doc->extra_buffers;
+  doc->extra_buffers = extra;
+
+  // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
+  impl::name_null_sentry sentry(_root);
+
+  return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
+}
+
+PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
+{
+  if (!_root) return xml_node();
+
+  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+    if (i->name && impl::strequal(name_, i->name)) {
+      for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+        if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
+          return xml_node(i);
+    }
+
+  return xml_node();
+}
+
+PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
+{
+  if (!_root) return xml_node();
+
+  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+    for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
+      if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
+        return xml_node(i);
+
+  return xml_node();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN string_t xml_node::path(char_t delimiter) const
+{
+  if (!_root) return string_t();
+
+  size_t offset = 0;
+
+  for (xml_node_struct* i = _root; i; i = i->parent) {
+    offset += (i != _root);
+    offset += i->name ? impl::strlength(i->name) : 0;
+  }
+
+  string_t result;
+  result.resize(offset);
+
+  for (xml_node_struct* j = _root; j; j = j->parent) {
+    if (j != _root)
+      result[--offset] = delimiter;
+
+    if (j->name && *j->name) {
+      size_t length = impl::strlength(j->name);
+
+      offset -= length;
+      memcpy(&result[offset], j->name, length * sizeof(char_t));
+    }
+  }
+
+  assert(offset == 0);
+
+  return result;
+}
+#endif
+
+PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
+{
+  xml_node found = *this; // Current search context.
+
+  if (!_root || !path_ || !path_[0]) return found;
+
+  if (path_[0] == delimiter) {
+    // Absolute path; e.g. '/foo/bar'
+    found = found.root();
+    ++path_;
+  }
+
+  const char_t* path_segment = path_;
+
+  while (*path_segment == delimiter) ++path_segment;
+
+  const char_t* path_segment_end = path_segment;
+
+  while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
+
+  if (path_segment == path_segment_end) return found;
+
+  const char_t* next_segment = path_segment_end;
+
+  while (*next_segment == delimiter) ++next_segment;
+
+  if (*path_segment == '.' && path_segment + 1 == path_segment_end)
+    return found.first_element_by_path(next_segment, delimiter);
+  else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
+    return found.parent().first_element_by_path(next_segment, delimiter);
+  else {
+    for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) {
+      if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) {
+        xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
+
+        if (subsearch) return subsearch;
+      }
+    }
+
+    return xml_node();
+  }
+}
+
+PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
+{
+  walker._depth = -1;
+
+  xml_node arg_begin = *this;
+  if (!walker.begin(arg_begin)) return false;
+
+  xml_node cur = first_child();
+
+  if (cur) {
+    ++walker._depth;
+
+    do {
+      xml_node arg_for_each = cur;
+      if (!walker.for_each(arg_for_each))
+        return false;
+
+      if (cur.first_child()) {
+        ++walker._depth;
+        cur = cur.first_child();
+      } else if (cur.next_sibling())
+        cur = cur.next_sibling();
+      else {
+        // Borland C++ workaround
+        while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) {
+          --walker._depth;
+          cur = cur.parent();
+        }
+
+        if (cur != *this)
+          cur = cur.next_sibling();
+      }
+    } while (cur && cur != *this);
+  }
+
+  assert(walker._depth == -1);
+
+  xml_node arg_end = *this;
+  return walker.end(arg_end);
+}
+
+PUGI__FN size_t xml_node::hash_value() const
+{
+  return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
+}
+
+PUGI__FN xml_node_struct* xml_node::internal_object() const
+{
+  return _root;
+}
+
+PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+{
+  if (!_root) return;
+
+  impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+  impl::node_output(buffered_writer, _root, indent, flags, depth);
+
+  buffered_writer.flush();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
+{
+  xml_writer_stream writer(stream);
+
+  print(writer, indent, flags, encoding, depth);
+}
+
+PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
+{
+  xml_writer_stream writer(stream);
+
+  print(writer, indent, flags, encoding_wchar, depth);
+}
+#endif
+
+PUGI__FN ptrdiff_t xml_node::offset_debug() const
+{
+  if (!_root) return -1;
+
+  impl::xml_document_struct& doc = impl::get_document(_root);
+
+  // we can determine the offset reliably only if there is exactly once parse buffer
+  if (!doc.buffer || doc.extra_buffers) return -1;
+
+  switch (type()) {
+  case node_document:
+    return 0;
+
+  case node_element:
+  case node_declaration:
+  case node_pi:
+    return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
+
+  case node_pcdata:
+  case node_cdata:
+  case node_comment:
+  case node_doctype:
+    return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
+
+  default:
+    return -1;
+  }
+}
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
+{
+  return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
+{
+  return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
+{
+}
+
+PUGI__FN xml_node_struct* xml_text::_data() const
+{
+  if (!_root || impl::is_text_node(_root)) return _root;
+
+  for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
+    if (impl::is_text_node(node))
+      return node;
+
+  return 0;
+}
+
+PUGI__FN xml_node_struct* xml_text::_data_new()
+{
+  xml_node_struct* d = _data();
+  if (d) return d;
+
+  return xml_node(_root).append_child(node_pcdata).internal_object();
+}
+
+PUGI__FN xml_text::xml_text(): _root(0)
+{
+}
+
+PUGI__FN static void unspecified_bool_xml_text(xml_text***)
+{
+}
+
+PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
+{
+  return _data() ? unspecified_bool_xml_text : 0;
+}
+
+PUGI__FN bool xml_text::operator!() const
+{
+  return !_data();
+}
+
+PUGI__FN bool xml_text::empty() const
+{
+  return _data() == 0;
+}
+
+PUGI__FN const char_t* xml_text::get() const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? d->value + 0 : def;
+}
+
+PUGI__FN int xml_text::as_int(int def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_int(d->value) : def;
+}
+
+PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_uint(d->value) : def;
+}
+
+PUGI__FN double xml_text::as_double(double def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_double(d->value) : def;
+}
+
+PUGI__FN float xml_text::as_float(float def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_float(d->value) : def;
+}
+
+PUGI__FN bool xml_text::as_bool(bool def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_bool(d->value) : def;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN long long xml_text::as_llong(long long def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_llong(d->value) : def;
+}
+
+PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
+{
+  xml_node_struct* d = _data();
+
+  return (d && d->value) ? impl::get_value_ullong(d->value) : def;
+}
+#endif
+
+PUGI__FN bool xml_text::set(const char_t* rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
+}
+
+PUGI__FN bool xml_text::set(int rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(unsigned int rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(float rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(double rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(bool rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN bool xml_text::set(long long rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+
+PUGI__FN bool xml_text::set(unsigned long long rhs)
+{
+  xml_node_struct* dn = _data_new();
+
+  return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
+}
+#endif
+
+PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(int rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(double rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(float rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(bool rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+#ifdef PUGIXML_HAS_LONG_LONG
+PUGI__FN xml_text& xml_text::operator=(long long rhs)
+{
+  set(rhs);
+  return *this;
+}
+
+PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
+{
+  set(rhs);
+  return *this;
+}
+#endif
+
+PUGI__FN xml_node xml_text::data() const
+{
+  return xml_node(_data());
+}
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
+{
+  return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
+{
+  return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN xml_node_iterator::xml_node_iterator()
+{
+}
+
+PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
+{
+}
+
+PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+{
+}
+
+PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
+{
+  return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
+{
+  return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_node& xml_node_iterator::operator*() const
+{
+  assert(_wrap._root);
+  return _wrap;
+}
+
+PUGI__FN xml_node* xml_node_iterator::operator->() const
+{
+  assert(_wrap._root);
+  return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
+{
+  assert(_wrap._root);
+  _wrap._root = _wrap._root->next_sibling;
+  return *this;
+}
+
+PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
+{
+  xml_node_iterator temp = *this;
+  ++*this;
+  return temp;
+}
+
+PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
+{
+  _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
+  return *this;
+}
+
+PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
+{
+  xml_node_iterator temp = *this;
+  --*this;
+  return temp;
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
+{
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
+{
+}
+
+PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
+{
+}
+
+PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
+{
+  return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
+{
+  return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
+{
+  assert(_wrap._attr);
+  return _wrap;
+}
+
+PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
+{
+  assert(_wrap._attr);
+  return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
+{
+  assert(_wrap._attr);
+  _wrap._attr = _wrap._attr->next_attribute;
+  return *this;
+}
+
+PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
+{
+  xml_attribute_iterator temp = *this;
+  ++*this;
+  return temp;
+}
+
+PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
+{
+  _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
+  return *this;
+}
+
+PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
+{
+  xml_attribute_iterator temp = *this;
+  --*this;
+  return temp;
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
+{
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
+{
+}
+
+PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
+{
+}
+
+PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
+{
+  return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
+}
+
+PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
+{
+  return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
+}
+
+PUGI__FN xml_node& xml_named_node_iterator::operator*() const
+{
+  assert(_wrap._root);
+  return _wrap;
+}
+
+PUGI__FN xml_node* xml_named_node_iterator::operator->() const
+{
+  assert(_wrap._root);
+  return const_cast<xml_node*>(&_wrap); // BCC32 workaround
+}
+
+PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
+{
+  assert(_wrap._root);
+  _wrap = _wrap.next_sibling(_name);
+  return *this;
+}
+
+PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
+{
+  xml_named_node_iterator temp = *this;
+  ++*this;
+  return temp;
+}
+
+PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
+{
+  if (_wrap._root)
+    _wrap = _wrap.previous_sibling(_name);
+  else {
+    _wrap = _parent.last_child();
+
+    if (!impl::strequal(_wrap.name(), _name))
+      _wrap = _wrap.previous_sibling(_name);
+  }
+
+  return *this;
+}
+
+PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
+{
+  xml_named_node_iterator temp = *this;
+  --*this;
+  return temp;
+}
+
+PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
+{
+}
+
+PUGI__FN xml_parse_result::operator bool() const
+{
+  return status == status_ok;
+}
+
+PUGI__FN const char* xml_parse_result::description() const
+{
+  switch (status) {
+  case status_ok:
+    return "No error";
+
+  case status_file_not_found:
+    return "File was not found";
+  case status_io_error:
+    return "Error reading from file/stream";
+  case status_out_of_memory:
+    return "Could not allocate memory";
+  case status_internal_error:
+    return "Internal error occurred";
+
+  case status_unrecognized_tag:
+    return "Could not determine tag type";
+
+  case status_bad_pi:
+    return "Error parsing document declaration/processing instruction";
+  case status_bad_comment:
+    return "Error parsing comment";
+  case status_bad_cdata:
+    return "Error parsing CDATA section";
+  case status_bad_doctype:
+    return "Error parsing document type declaration";
+  case status_bad_pcdata:
+    return "Error parsing PCDATA section";
+  case status_bad_start_element:
+    return "Error parsing start element tag";
+  case status_bad_attribute:
+    return "Error parsing element attribute";
+  case status_bad_end_element:
+    return "Error parsing end element tag";
+  case status_end_element_mismatch:
+    return "Start-end tags mismatch";
+
+  case status_append_invalid_root:
+    return "Unable to append nodes: root is not an element or document";
+
+  case status_no_document_element:
+    return "No document element found";
+
+  default:
+    return "Unknown error";
+  }
+}
+
+PUGI__FN xml_document::xml_document(): _buffer(0)
+{
+  create();
+}
+
+PUGI__FN xml_document::~xml_document()
+{
+  destroy();
+}
+
+PUGI__FN void xml_document::reset()
+{
+  destroy();
+  create();
+}
+
+PUGI__FN void xml_document::reset(const xml_document& proto)
+{
+  reset();
+
+  for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
+    append_copy(cur);
+}
+
+PUGI__FN void xml_document::create()
+{
+  assert(!_root);
+
+#ifdef PUGIXML_COMPACT
+  const size_t page_offset = sizeof(uint32_t);
+#else
+  const size_t page_offset = 0;
+#endif
+
+  // initialize sentinel page
+  PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory));
+
+  // align upwards to page boundary
+  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
+
+  // prepare page structure
+  impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
+  assert(page);
+
+  page->busy_size = impl::xml_memory_page_size;
+
+  // setup first page marker
+#ifdef PUGIXML_COMPACT
+  // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+  page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
+  *page->compact_page_marker = sizeof(impl::xml_memory_page);
+#endif
+
+  // allocate new root
+  _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
+  _root->prev_sibling_c = _root;
+
+  // setup sentinel page
+  page->allocator = static_cast<impl::xml_document_struct*>(_root);
+
+  // verify the document allocation
+  assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
+}
+
+PUGI__FN void xml_document::destroy()
+{
+  assert(_root);
+
+  // destroy static storage
+  if (_buffer) {
+    impl::xml_memory::deallocate(_buffer);
+    _buffer = 0;
+  }
+
+  // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
+  for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) {
+    if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
+  }
+
+  // destroy dynamic storage, leave sentinel page (it's in static memory)
+  impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
+  assert(root_page && !root_page->prev);
+  assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
+
+  for (impl::xml_memory_page* page = root_page->next; page; ) {
+    impl::xml_memory_page* next = page->next;
+
+    impl::xml_allocator::deallocate_page(page);
+
+    page = next;
+  }
+
+#ifdef PUGIXML_COMPACT
+  // destroy hash table
+  static_cast<impl::xml_document_struct*>(_root)->hash.clear();
+#endif
+
+  _root = 0;
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
+{
+  reset();
+
+  return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
+{
+  reset();
+
+  return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
+}
+#endif
+
+PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
+{
+  // Force native encoding (skip autodetection)
+#ifdef PUGIXML_WCHAR_MODE
+  xml_encoding encoding = encoding_wchar;
+#else
+  xml_encoding encoding = encoding_utf8;
+#endif
+
+  return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
+}
+
+PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
+{
+  return load_string(contents, options);
+}
+
+PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
+{
+  reset();
+
+  using impl::auto_deleter; // MSVC7 workaround
+  auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose);
+
+  return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
+{
+  reset();
+
+  using impl::auto_deleter; // MSVC7 workaround
+  auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose);
+
+  return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+  reset();
+
+  return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+  reset();
+
+  return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
+}
+
+PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
+{
+  reset();
+
+  return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
+}
+
+PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+  impl::xml_buffered_writer buffered_writer(writer, encoding);
+
+  if ((flags & format_write_bom) && encoding != encoding_latin1) {
+    // BOM always represents the codepoint U+FEFF, so just write it in native encoding
+#ifdef PUGIXML_WCHAR_MODE
+    unsigned int bom = 0xfeff;
+    buffered_writer.write(static_cast<wchar_t>(bom));
+#else
+    buffered_writer.write('\xef', '\xbb', '\xbf');
+#endif
+  }
+
+  if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) {
+    buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
+    if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
+    buffered_writer.write('?', '>');
+    if (!(flags & format_raw)) buffered_writer.write('\n');
+  }
+
+  impl::node_output(buffered_writer, _root, indent, flags, 0);
+
+  buffered_writer.flush();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+  xml_writer_stream writer(stream);
+
+  save(writer, indent, flags, encoding);
+}
+
+PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
+{
+  xml_writer_stream writer(stream);
+
+  save(writer, indent, flags, encoding_wchar);
+}
+#endif
+
+PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+  using impl::auto_deleter; // MSVC7 workaround
+  auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose);
+
+  return impl::save_file_impl(*this, file.data, indent, flags, encoding);
+}
+
+PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
+{
+  using impl::auto_deleter; // MSVC7 workaround
+  auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose);
+
+  return impl::save_file_impl(*this, file.data, indent, flags, encoding);
+}
+
+PUGI__FN xml_node xml_document::document_element() const
+{
+  assert(_root);
+
+  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
+    if (PUGI__NODETYPE(i) == node_element)
+      return xml_node(i);
+
+  return xml_node();
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
+{
+  assert(str);
+
+  return impl::as_utf8_impl(str, impl::strlength_wide(str));
+}
+
+PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
+{
+  return impl::as_utf8_impl(str.c_str(), str.size());
+}
+
+PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
+{
+  assert(str);
+
+  return impl::as_wide_impl(str, strlen(str));
+}
+
+PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
+{
+  return impl::as_wide_impl(str.c_str(), str.size());
+}
+#endif
+
+PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
+{
+  impl::xml_memory::allocate = allocate;
+  impl::xml_memory::deallocate = deallocate;
+}
+
+PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
+{
+  return impl::xml_memory::allocate;
+}
+
+PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
+{
+  return impl::xml_memory::deallocate;
+}
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
+{
+  return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
+{
+  return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
+{
+  return std::bidirectional_iterator_tag();
+}
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
+{
+  return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
+{
+  return std::bidirectional_iterator_tag();
+}
+
+PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
+{
+  return std::bidirectional_iterator_tag();
+}
+}
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+// STL replacements
+PUGI__NS_BEGIN
+struct equal_to {
+  template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+    return lhs == rhs;
+  }
+};
+
+struct not_equal_to {
+  template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+    return lhs != rhs;
+  }
+};
+
+struct less {
+  template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+    return lhs < rhs;
+  }
+};
+
+struct less_equal {
+  template <typename T> bool operator()(const T& lhs, const T& rhs) const {
+    return lhs <= rhs;
+  }
+};
+
+template <typename T> void swap(T& lhs, T& rhs)
+{
+  T temp = lhs;
+  lhs = rhs;
+  rhs = temp;
+}
+
+template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
+{
+  I result = begin;
+
+  for (I it = begin + 1; it != end; ++it)
+    if (pred(*it, *result))
+      result = it;
+
+  return result;
+}
+
+template <typename I> void reverse(I begin, I end)
+{
+  while (end - begin > 1) swap(*begin++, *--end);
+}
+
+template <typename I> I unique(I begin, I end)
+{
+  // fast skip head
+  while (end - begin > 1 && *begin != *(begin + 1)) begin++;
+
+  if (begin == end) return begin;
+
+  // last written element
+  I write = begin++;
+
+  // merge unique elements
+  while (begin != end) {
+    if (*begin != *write)
+      *++write = *begin++;
+    else
+      begin++;
+  }
+
+  // past-the-end (write points to live element)
+  return write + 1;
+}
+
+template <typename I> void copy_backwards(I begin, I end, I target)
+{
+  while (begin != end) *--target = *--end;
+}
+
+template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
+{
+  assert(begin != end);
+
+  for (I it = begin + 1; it != end; ++it) {
+    T val = *it;
+
+    if (pred(val, *begin)) {
+      // move to front
+      copy_backwards(begin, it, it + 1);
+      *begin = val;
+    } else {
+      I hole = it;
+
+      // move hole backwards
+      while (pred(val, *(hole - 1))) {
+        *hole = *(hole - 1);
+        hole--;
+      }
+
+      // fill hole with element
+      *hole = val;
+    }
+  }
+}
+
+// std variant for elements with ==
+template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
+{
+  I eqbeg = middle, eqend = middle + 1;
+
+  // expand equal range
+  while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
+  while (eqend != end && *eqend == *eqbeg) ++eqend;
+
+  // process outer elements
+  I ltend = eqbeg, gtbeg = eqend;
+
+  for (;;) {
+    // find the element from the right side that belongs to the left one
+    for (; gtbeg != end; ++gtbeg)
+      if (!pred(*eqbeg, *gtbeg)) {
+        if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
+        else break;
+      }
+
+    // find the element from the left side that belongs to the right one
+    for (; ltend != begin; --ltend)
+      if (!pred(*(ltend - 1), *eqbeg)) {
+        if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
+        else break;
+      }
+
+    // scanned all elements
+    if (gtbeg == end && ltend == begin) {
+      *out_eqbeg = eqbeg;
+      *out_eqend = eqend;
+      return;
+    }
+
+    // make room for elements by moving equal area
+    if (gtbeg == end) {
+      if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
+      swap(*eqbeg, *--eqend);
+    } else if (ltend == begin) {
+      if (eqend != gtbeg) swap(*eqbeg, *eqend);
+      ++eqend;
+      swap(*gtbeg++, *eqbeg++);
+    } else swap(*gtbeg++, *--ltend);
+  }
+}
+
+template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
+{
+  if (pred(*middle, *first)) swap(*middle, *first);
+  if (pred(*last, *middle)) swap(*last, *middle);
+  if (pred(*middle, *first)) swap(*middle, *first);
+}
+
+template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
+{
+  if (last - first <= 40) {
+    // median of three for small chunks
+    median3(first, middle, last, pred);
+  } else {
+    // median of nine
+    size_t step = (last - first + 1) / 8;
+
+    median3(first, first + step, first + 2 * step, pred);
+    median3(middle - step, middle, middle + step, pred);
+    median3(last - 2 * step, last - step, last, pred);
+    median3(first + step, middle, last - step, pred);
+  }
+}
+
+template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
+{
+  // sort large chunks
+  while (end - begin > 32) {
+    // find median element
+    I middle = begin + (end - begin) / 2;
+    median(begin, middle, end - 1, pred);
+
+    // partition in three chunks (< = >)
+    I eqbeg, eqend;
+    partition(begin, middle, end, pred, &eqbeg, &eqend);
+
+    // loop on larger half
+    if (eqbeg - begin > end - eqend) {
+      sort(eqend, end, pred);
+      end = eqbeg;
+    } else {
+      sort(begin, eqbeg, pred);
+      begin = eqend;
+    }
+  }
+
+  // insertion sort small chunk
+  if (begin != end) insertion_sort(begin, end, pred, &*begin);
+}
+PUGI__NS_END
+
+// Allocator used for AST and evaluation stacks
+PUGI__NS_BEGIN
+static const size_t xpath_memory_page_size =
+#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
+  PUGIXML_MEMORY_XPATH_PAGE_SIZE
+#else
+  4096
+#endif
+  ;
+
+static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
+
+struct xpath_memory_block {
+  xpath_memory_block* next;
+  size_t capacity;
+
+  union {
+    char data[xpath_memory_page_size];
+    double alignment;
+  };
+};
+
+class xpath_allocator
+{
+  xpath_memory_block* _root;
+  size_t _root_size;
+
+public:
+#ifdef PUGIXML_NO_EXCEPTIONS
+  jmp_buf* error_handler;
+#endif
+
+  xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+    error_handler = 0;
+#endif
+  }
+
+  void* allocate_nothrow(size_t size) {
+    // round size up to block alignment boundary
+    size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
+
+    if (_root_size + size <= _root->capacity) {
+      void* buf = &_root->data[0] + _root_size;
+      _root_size += size;
+      return buf;
+    } else {
+      // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
+      size_t block_capacity_base = sizeof(_root->data);
+      size_t block_capacity_req = size + block_capacity_base / 4;
+      size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
+
+      size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
+
+      xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
+      if (!block) return 0;
+
+      block->next = _root;
+      block->capacity = block_capacity;
+
+      _root = block;
+      _root_size = size;
+
+      return block->data;
+    }
+  }
+
+  void* allocate(size_t size) {
+    void* result = allocate_nothrow(size);
+
+    if (!result) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+      assert(error_handler);
+      longjmp(*error_handler, 1);
+#else
+      throw std::bad_alloc();
+#endif
+    }
+
+    return result;
+  }
+
+  void* reallocate(void* ptr, size_t old_size, size_t new_size) {
+    // round size up to block alignment boundary
+    old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
+    new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
+
+    // we can only reallocate the last object
+    assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
+
+    // adjust root size so that we have not allocated the object at all
+    bool only_object = (_root_size == old_size);
+
+    if (ptr) _root_size -= old_size;
+
+    // allocate a new version (this will obviously reuse the memory if possible)
+    void* result = allocate(new_size);
+    assert(result);
+
+    // we have a new block
+    if (result != ptr && ptr) {
+      // copy old data
+      assert(new_size >= old_size);
+      memcpy(result, ptr, old_size);
+
+      // free the previous page if it had no other objects
+      if (only_object) {
+        assert(_root->data == result);
+        assert(_root->next);
+
+        xpath_memory_block* next = _root->next->next;
+
+        if (next) {
+          // deallocate the whole page, unless it was the first one
+          xml_memory::deallocate(_root->next);
+          _root->next = next;
+        }
+      }
+    }
+
+    return result;
+  }
+
+  void revert(const xpath_allocator& state) {
+    // free all new pages
+    xpath_memory_block* cur = _root;
+
+    while (cur != state._root) {
+      xpath_memory_block* next = cur->next;
+
+      xml_memory::deallocate(cur);
+
+      cur = next;
+    }
+
+    // restore state
+    _root = state._root;
+    _root_size = state._root_size;
+  }
+
+  void release() {
+    xpath_memory_block* cur = _root;
+    assert(cur);
+
+    while (cur->next) {
+      xpath_memory_block* next = cur->next;
+
+      xml_memory::deallocate(cur);
+
+      cur = next;
+    }
+  }
+};
+
+struct xpath_allocator_capture {
+  xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) {
+  }
+
+  ~xpath_allocator_capture() {
+    _target->revert(_state);
+  }
+
+  xpath_allocator* _target;
+  xpath_allocator _state;
+};
+
+struct xpath_stack {
+  xpath_allocator* result;
+  xpath_allocator* temp;
+};
+
+struct xpath_stack_data {
+  xpath_memory_block blocks[2];
+  xpath_allocator result;
+  xpath_allocator temp;
+  xpath_stack stack;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  jmp_buf error_handler;
+#endif
+
+  xpath_stack_data(): result(blocks + 0), temp(blocks + 1) {
+    blocks[0].next = blocks[1].next = 0;
+    blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
+
+    stack.result = &result;
+    stack.temp = &temp;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+    result.error_handler = temp.error_handler = &error_handler;
+#endif
+  }
+
+  ~xpath_stack_data() {
+    result.release();
+    temp.release();
+  }
+};
+PUGI__NS_END
+
+// String class
+PUGI__NS_BEGIN
+class xpath_string
+{
+  const char_t* _buffer;
+  bool _uses_heap;
+  size_t _length_heap;
+
+  static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) {
+    char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
+    assert(result);
+
+    memcpy(result, string, length * sizeof(char_t));
+    result[length] = 0;
+
+    return result;
+  }
+
+  xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) {
+  }
+
+public:
+  static xpath_string from_const(const char_t* str) {
+    return xpath_string(str, false, 0);
+  }
+
+  static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) {
+    assert(begin <= end && *end == 0);
+
+    return xpath_string(begin, true, static_cast<size_t>(end - begin));
+  }
+
+  static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) {
+    assert(begin <= end);
+
+    size_t length = static_cast<size_t>(end - begin);
+
+    return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
+  }
+
+  xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) {
+  }
+
+  void append(const xpath_string& o, xpath_allocator* alloc) {
+    // skip empty sources
+    if (!*o._buffer) return;
+
+    // fast append for constant empty target and constant source
+    if (!*_buffer && !_uses_heap && !o._uses_heap) {
+      _buffer = o._buffer;
+    } else {
+      // need to make heap copy
+      size_t target_length = length();
+      size_t source_length = o.length();
+      size_t result_length = target_length + source_length;
+
+      // allocate new buffer
+      char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
+      assert(result);
+
+      // append first string to the new buffer in case there was no reallocation
+      if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
+
+      // append second string to the new buffer
+      memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
+      result[result_length] = 0;
+
+      // finalize
+      _buffer = result;
+      _uses_heap = true;
+      _length_heap = result_length;
+    }
+  }
+
+  const char_t* c_str() const {
+    return _buffer;
+  }
+
+  size_t length() const {
+    return _uses_heap ? _length_heap : strlength(_buffer);
+  }
+
+  char_t* data(xpath_allocator* alloc) {
+    // make private heap copy
+    if (!_uses_heap) {
+      size_t length_ = strlength(_buffer);
+
+      _buffer = duplicate_string(_buffer, length_, alloc);
+      _uses_heap = true;
+      _length_heap = length_;
+    }
+
+    return const_cast<char_t*>(_buffer);
+  }
+
+  bool empty() const {
+    return *_buffer == 0;
+  }
+
+  bool operator==(const xpath_string& o) const {
+    return strequal(_buffer, o._buffer);
+  }
+
+  bool operator!=(const xpath_string& o) const {
+    return !strequal(_buffer, o._buffer);
+  }
+
+  bool uses_heap() const {
+    return _uses_heap;
+  }
+};
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
+{
+  while (*pattern && *string == *pattern) {
+    string++;
+    pattern++;
+  }
+
+  return *pattern == 0;
+}
+
+PUGI__FN const char_t* find_char(const char_t* s, char_t c)
+{
+#ifdef PUGIXML_WCHAR_MODE
+  return wcschr(s, c);
+#else
+  return strchr(s, c);
+#endif
+}
+
+PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
+{
+#ifdef PUGIXML_WCHAR_MODE
+  // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
+  return (*p == 0) ? s : wcsstr(s, p);
+#else
+  return strstr(s, p);
+#endif
+}
+
+// Converts symbol to lower case, if it is an ASCII one
+PUGI__FN char_t tolower_ascii(char_t ch)
+{
+  return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
+}
+
+PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
+{
+  if (na.attribute())
+    return xpath_string::from_const(na.attribute().value());
+  else {
+    xml_node n = na.node();
+
+    switch (n.type()) {
+    case node_pcdata:
+    case node_cdata:
+    case node_comment:
+    case node_pi:
+      return xpath_string::from_const(n.value());
+
+    case node_document:
+    case node_element: {
+      xpath_string result;
+
+      xml_node cur = n.first_child();
+
+      while (cur && cur != n) {
+        if (cur.type() == node_pcdata || cur.type() == node_cdata)
+          result.append(xpath_string::from_const(cur.value()), alloc);
+
+        if (cur.first_child())
+          cur = cur.first_child();
+        else if (cur.next_sibling())
+          cur = cur.next_sibling();
+        else {
+          while (!cur.next_sibling() && cur != n)
+            cur = cur.parent();
+
+          if (cur != n) cur = cur.next_sibling();
+        }
+      }
+
+      return result;
+    }
+
+    default:
+      return xpath_string();
+    }
+  }
+}
+
+PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
+{
+  assert(ln->parent == rn->parent);
+
+  // there is no common ancestor (the shared parent is null), nodes are from different documents
+  if (!ln->parent) return ln < rn;
+
+  // determine sibling order
+  xml_node_struct* ls = ln;
+  xml_node_struct* rs = rn;
+
+  while (ls && rs) {
+    if (ls == rn) return true;
+    if (rs == ln) return false;
+
+    ls = ls->next_sibling;
+    rs = rs->next_sibling;
+  }
+
+  // if rn sibling chain ended ln must be before rn
+  return !rs;
+}
+
+PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
+{
+  // find common ancestor at the same depth, if any
+  xml_node_struct* lp = ln;
+  xml_node_struct* rp = rn;
+
+  while (lp && rp && lp->parent != rp->parent) {
+    lp = lp->parent;
+    rp = rp->parent;
+  }
+
+  // parents are the same!
+  if (lp && rp) return node_is_before_sibling(lp, rp);
+
+  // nodes are at different depths, need to normalize heights
+  bool left_higher = !lp;
+
+  while (lp) {
+    lp = lp->parent;
+    ln = ln->parent;
+  }
+
+  while (rp) {
+    rp = rp->parent;
+    rn = rn->parent;
+  }
+
+  // one node is the ancestor of the other
+  if (ln == rn) return left_higher;
+
+  // find common ancestor... again
+  while (ln->parent != rn->parent) {
+    ln = ln->parent;
+    rn = rn->parent;
+  }
+
+  return node_is_before_sibling(ln, rn);
+}
+
+PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
+{
+  while (node && node != parent) node = node->parent;
+
+  return parent && node == parent;
+}
+
+PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
+{
+  xml_node_struct* node = xnode.node().internal_object();
+
+  if (node) {
+    if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) {
+      if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
+      if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
+    }
+
+    return 0;
+  }
+
+  xml_attribute_struct* attr = xnode.attribute().internal_object();
+
+  if (attr) {
+    if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) {
+      if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
+      if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
+    }
+
+    return 0;
+  }
+
+  return 0;
+}
+
+struct document_order_comparator {
+  bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
+    // optimized document order based check
+    const void* lo = document_buffer_order(lhs);
+    const void* ro = document_buffer_order(rhs);
+
+    if (lo && ro) return lo < ro;
+
+    // slow comparison
+    xml_node ln = lhs.node(), rn = rhs.node();
+
+    // compare attributes
+    if (lhs.attribute() && rhs.attribute()) {
+      // shared parent
+      if (lhs.parent() == rhs.parent()) {
+        // determine sibling order
+        for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
+          if (a == rhs.attribute())
+            return true;
+
+        return false;
+      }
+
+      // compare attribute parents
+      ln = lhs.parent();
+      rn = rhs.parent();
+    } else if (lhs.attribute()) {
+      // attributes go after the parent element
+      if (lhs.parent() == rhs.node()) return false;
+
+      ln = lhs.parent();
+    } else if (rhs.attribute()) {
+      // attributes go after the parent element
+      if (rhs.parent() == lhs.node()) return true;
+
+      rn = rhs.parent();
+    }
+
+    if (ln == rn) return false;
+
+    if (!ln || !rn) return ln < rn;
+
+    return node_is_before(ln.internal_object(), rn.internal_object());
+  }
+};
+
+struct duplicate_comparator {
+  bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
+    if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
+    else return rhs.attribute() ? false : lhs.node() < rhs.node();
+  }
+};
+
+PUGI__FN double gen_nan()
+{
+#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
+  union {
+    float f;
+    uint32_t i;
+  } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
+  u[0].i = 0x7fc00000;
+  return u[0].f;
+#else
+  // fallback
+  const volatile double zero = 0.0;
+  return zero / zero;
+#endif
+}
+
+PUGI__FN bool is_nan(double value)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+  return !!_isnan(value);
+#elif defined(fpclassify) && defined(FP_NAN)
+  return fpclassify(value) == FP_NAN;
+#else
+  // fallback
+  const volatile double v = value;
+  return v != v;
+#endif
+}
+
+PUGI__FN const char_t* convert_number_to_string_special(double value)
+{
+#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
+  if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
+  if (_isnan(value)) return PUGIXML_TEXT("NaN");
+  return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
+  switch (fpclassify(value)) {
+  case FP_NAN:
+    return PUGIXML_TEXT("NaN");
+
+  case FP_INFINITE:
+    return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+
+  case FP_ZERO:
+    return PUGIXML_TEXT("0");
+
+  default:
+    return 0;
+  }
+#else
+  // fallback
+  const volatile double v = value;
+
+  if (v == 0) return PUGIXML_TEXT("0");
+  if (v != v) return PUGIXML_TEXT("NaN");
+  if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
+  return 0;
+#endif
+}
+
+PUGI__FN bool convert_number_to_boolean(double value)
+{
+  return (value != 0 && !is_nan(value));
+}
+
+PUGI__FN void truncate_zeros(char* begin, char* end)
+{
+  while (begin != end && end[-1] == '0') end--;
+
+  *end = 0;
+}
+
+// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
+#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+{
+  // get base values
+  int sign, exponent;
+  _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
+
+  // truncate redundant zeros
+  truncate_zeros(buffer, buffer + strlen(buffer));
+
+  // fill results
+  *out_mantissa = buffer;
+  *out_exponent = exponent;
+}
+#else
+PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
+{
+  // get a scientific notation value with IEEE DBL_DIG decimals
+  sprintf(buffer, "%.*e", DBL_DIG, value);
+  assert(strlen(buffer) < buffer_size);
+  (void)!buffer_size;
+
+  // get the exponent (possibly negative)
+  char* exponent_string = strchr(buffer, 'e');
+  assert(exponent_string);
+
+  int exponent = atoi(exponent_string + 1);
+
+  // extract mantissa string: skip sign
+  char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
+  assert(mantissa[0] != '0' && mantissa[1] == '.');
+
+  // divide mantissa by 10 to eliminate integer part
+  mantissa[1] = mantissa[0];
+  mantissa++;
+  exponent++;
+
+  // remove extra mantissa digits and zero-terminate mantissa
+  truncate_zeros(mantissa, exponent_string);
+
+  // fill results
+  *out_mantissa = mantissa;
+  *out_exponent = exponent;
+}
+#endif
+
+PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
+{
+  // try special number conversion
+  const char_t* special = convert_number_to_string_special(value);
+  if (special) return xpath_string::from_const(special);
+
+  // get mantissa + exponent form
+  char mantissa_buffer[32];
+
+  char* mantissa;
+  int exponent;
+  convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
+
+  // allocate a buffer of suitable length for the number
+  size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
+  char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
+  assert(result);
+
+  // make the number!
+  char_t* s = result;
+
+  // sign
+  if (value < 0) *s++ = '-';
+
+  // integer part
+  if (exponent <= 0) {
+    *s++ = '0';
+  } else {
+    while (exponent > 0) {
+      assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
+      *s++ = *mantissa ? *mantissa++ : '0';
+      exponent--;
+    }
+  }
+
+  // fractional part
+  if (*mantissa) {
+    // decimal point
+    *s++ = '.';
+
+    // extra zeroes from negative exponent
+    while (exponent < 0) {
+      *s++ = '0';
+      exponent++;
+    }
+
+    // extra mantissa digits
+    while (*mantissa) {
+      assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
+      *s++ = *mantissa++;
+    }
+  }
+
+  // zero-terminate
+  assert(s < result + result_size);
+  *s = 0;
+
+  return xpath_string::from_heap_preallocated(result, s);
+}
+
+PUGI__FN bool check_string_to_number_format(const char_t* string)
+{
+  // parse leading whitespace
+  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+  // parse sign
+  if (*string == '-') ++string;
+
+  if (!*string) return false;
+
+  // if there is no integer part, there should be a decimal part with at least one digit
+  if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
+
+  // parse integer part
+  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+
+  // parse decimal part
+  if (*string == '.') {
+    ++string;
+
+    while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
+  }
+
+  // parse trailing whitespace
+  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
+
+  return *string == 0;
+}
+
+PUGI__FN double convert_string_to_number(const char_t* string)
+{
+  // check string format
+  if (!check_string_to_number_format(string)) return gen_nan();
+
+  // parse string
+#ifdef PUGIXML_WCHAR_MODE
+  return wcstod(string, 0);
+#else
+  return strtod(string, 0);
+#endif
+}
+
+PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
+{
+  size_t length = static_cast<size_t>(end - begin);
+  char_t* scratch = buffer;
+
+  if (length >= sizeof(buffer) / sizeof(buffer[0])) {
+    // need to make dummy on-heap copy
+    scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+    if (!scratch) return false;
+  }
+
+  // copy string to zero-terminated buffer and perform conversion
+  memcpy(scratch, begin, length * sizeof(char_t));
+  scratch[length] = 0;
+
+  *out_result = convert_string_to_number(scratch);
+
+  // free dummy buffer
+  if (scratch != buffer) xml_memory::deallocate(scratch);
+
+  return true;
+}
+
+PUGI__FN double round_nearest(double value)
+{
+  return floor(value + 0.5);
+}
+
+PUGI__FN double round_nearest_nzero(double value)
+{
+  // same as round_nearest, but returns -0 for [-0.5, -0]
+  // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
+  return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
+}
+
+PUGI__FN const char_t* qualified_name(const xpath_node& node)
+{
+  return node.attribute() ? node.attribute().name() : node.node().name();
+}
+
+PUGI__FN const char_t* local_name(const xpath_node& node)
+{
+  const char_t* name = qualified_name(node);
+  const char_t* p = find_char(name, ':');
+
+  return p ? p + 1 : name;
+}
+
+struct namespace_uri_predicate {
+  const char_t* prefix;
+  size_t prefix_length;
+
+  namespace_uri_predicate(const char_t* name) {
+    const char_t* pos = find_char(name, ':');
+
+    prefix = pos ? name : 0;
+    prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
+  }
+
+  bool operator()(xml_attribute a) const {
+    const char_t* name = a.name();
+
+    if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
+
+    return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
+  }
+};
+
+PUGI__FN const char_t* namespace_uri(xml_node node)
+{
+  namespace_uri_predicate pred = node.name();
+
+  xml_node p = node;
+
+  while (p) {
+    xml_attribute a = p.find_attribute(pred);
+
+    if (a) return a.value();
+
+    p = p.parent();
+  }
+
+  return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
+{
+  namespace_uri_predicate pred = attr.name();
+
+  // Default namespace does not apply to attributes
+  if (!pred.prefix) return PUGIXML_TEXT("");
+
+  xml_node p = parent;
+
+  while (p) {
+    xml_attribute a = p.find_attribute(pred);
+
+    if (a) return a.value();
+
+    p = p.parent();
+  }
+
+  return PUGIXML_TEXT("");
+}
+
+PUGI__FN const char_t* namespace_uri(const xpath_node& node)
+{
+  return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
+}
+
+PUGI__FN char_t* normalize_space(char_t* buffer)
+{
+  char_t* write = buffer;
+
+  for (char_t* it = buffer; *it; ) {
+    char_t ch = *it++;
+
+    if (PUGI__IS_CHARTYPE(ch, ct_space)) {
+      // replace whitespace sequence with single space
+      while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
+
+      // avoid leading spaces
+      if (write != buffer) *write++ = ' ';
+    } else *write++ = ch;
+  }
+
+  // remove trailing space
+  if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
+
+  // zero-terminate
+  *write = 0;
+
+  return write;
+}
+
+PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
+{
+  char_t* write = buffer;
+
+  while (*buffer) {
+    PUGI__DMC_VOLATILE char_t ch = *buffer++;
+
+    const char_t* pos = find_char(from, ch);
+
+    if (!pos)
+      *write++ = ch; // do not process
+    else if (static_cast<size_t>(pos - from) < to_length)
+      *write++ = to[pos - from]; // replace
+  }
+
+  // zero-terminate
+  *write = 0;
+
+  return write;
+}
+
+PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
+{
+  unsigned char table[128] = {0};
+
+  while (*from) {
+    unsigned int fc = static_cast<unsigned int>(*from);
+    unsigned int tc = static_cast<unsigned int>(*to);
+
+    if (fc >= 128 || tc >= 128)
+      return 0;
+
+    // code=128 means "skip character"
+    if (!table[fc])
+      table[fc] = static_cast<unsigned char>(tc ? tc : 128);
+
+    from++;
+    if (tc) to++;
+  }
+
+  for (int i = 0; i < 128; ++i)
+    if (!table[i])
+      table[i] = static_cast<unsigned char>(i);
+
+  void* result = alloc->allocate_nothrow(sizeof(table));
+
+  if (result) {
+    memcpy(result, table, sizeof(table));
+  }
+
+  return static_cast<unsigned char*>(result);
+}
+
+PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
+{
+  char_t* write = buffer;
+
+  while (*buffer) {
+    char_t ch = *buffer++;
+    unsigned int index = static_cast<unsigned int>(ch);
+
+    if (index < 128) {
+      unsigned char code = table[index];
+
+      // code=128 means "skip character" (table size is 128 so 128 can be a special value)
+      // this code skips these characters without extra branches
+      *write = static_cast<char_t>(code);
+      write += 1 - (code >> 7);
+    } else {
+      *write++ = ch;
+    }
+  }
+
+  // zero-terminate
+  *write = 0;
+
+  return write;
+}
+
+inline bool is_xpath_attribute(const char_t* name)
+{
+  return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
+}
+
+struct xpath_variable_boolean: xpath_variable {
+  xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) {
+  }
+
+  bool value;
+  char_t name[1];
+};
+
+struct xpath_variable_number: xpath_variable {
+  xpath_variable_number(): xpath_variable(xpath_type_number), value(0) {
+  }
+
+  double value;
+  char_t name[1];
+};
+
+struct xpath_variable_string: xpath_variable {
+  xpath_variable_string(): xpath_variable(xpath_type_string), value(0) {
+  }
+
+  ~xpath_variable_string() {
+    if (value) xml_memory::deallocate(value);
+  }
+
+  char_t* value;
+  char_t name[1];
+};
+
+struct xpath_variable_node_set: xpath_variable {
+  xpath_variable_node_set(): xpath_variable(xpath_type_node_set) {
+  }
+
+  xpath_node_set value;
+  char_t name[1];
+};
+
+static const xpath_node_set dummy_node_set;
+
+PUGI__FN unsigned int hash_string(const char_t* str)
+{
+  // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
+  unsigned int result = 0;
+
+  while (*str) {
+    result += static_cast<unsigned int>(*str++);
+    result += result << 10;
+    result ^= result >> 6;
+  }
+
+  result += result << 3;
+  result ^= result >> 11;
+  result += result << 15;
+
+  return result;
+}
+
+template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
+{
+  size_t length = strlength(name);
+  if (length == 0) return 0; // empty variable names are invalid
+
+  // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
+  void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
+  if (!memory) return 0;
+
+  T* result = new (memory) T();
+
+  memcpy(result->name, name, (length + 1) * sizeof(char_t));
+
+  return result;
+}
+
+PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
+{
+  switch (type) {
+  case xpath_type_node_set:
+    return new_xpath_variable<xpath_variable_node_set>(name);
+
+  case xpath_type_number:
+    return new_xpath_variable<xpath_variable_number>(name);
+
+  case xpath_type_string:
+    return new_xpath_variable<xpath_variable_string>(name);
+
+  case xpath_type_boolean:
+    return new_xpath_variable<xpath_variable_boolean>(name);
+
+  default:
+    return 0;
+  }
+}
+
+template <typename T> PUGI__FN void delete_xpath_variable(T* var)
+{
+  var->~T();
+  xml_memory::deallocate(var);
+}
+
+PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
+{
+  switch (type) {
+  case xpath_type_node_set:
+    delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
+    break;
+
+  case xpath_type_number:
+    delete_xpath_variable(static_cast<xpath_variable_number*>(var));
+    break;
+
+  case xpath_type_string:
+    delete_xpath_variable(static_cast<xpath_variable_string*>(var));
+    break;
+
+  case xpath_type_boolean:
+    delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
+    break;
+
+  default:
+    assert(!"Invalid variable type");
+  }
+}
+
+PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
+{
+  switch (rhs->type()) {
+  case xpath_type_node_set:
+    return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
+
+  case xpath_type_number:
+    return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
+
+  case xpath_type_string:
+    return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
+
+  case xpath_type_boolean:
+    return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
+
+  default:
+    assert(!"Invalid variable type");
+    return false;
+  }
+}
+
+PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
+{
+  size_t length = static_cast<size_t>(end - begin);
+  char_t* scratch = buffer;
+
+  if (length >= sizeof(buffer) / sizeof(buffer[0])) {
+    // need to make dummy on-heap copy
+    scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
+    if (!scratch) return false;
+  }
+
+  // copy string to zero-terminated buffer and perform lookup
+  memcpy(scratch, begin, length * sizeof(char_t));
+  scratch[length] = 0;
+
+  *out_result = set->get(scratch);
+
+  // free dummy buffer
+  if (scratch != buffer) xml_memory::deallocate(scratch);
+
+  return true;
+}
+PUGI__NS_END
+
+// Internal node set class
+PUGI__NS_BEGIN
+PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
+{
+  if (end - begin < 2)
+    return xpath_node_set::type_sorted;
+
+  document_order_comparator cmp;
+
+  bool first = cmp(begin[0], begin[1]);
+
+  for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
+    if (cmp(it[0], it[1]) != first)
+      return xpath_node_set::type_unsorted;
+
+  return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
+}
+
+PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
+{
+  xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+  if (type == xpath_node_set::type_unsorted) {
+    xpath_node_set::type_t sorted = xpath_get_order(begin, end);
+
+    if (sorted == xpath_node_set::type_unsorted) {
+      sort(begin, end, document_order_comparator());
+
+      type = xpath_node_set::type_sorted;
+    } else
+      type = sorted;
+  }
+
+  if (type != order) reverse(begin, end);
+
+  return order;
+}
+
+PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
+{
+  if (begin == end) return xpath_node();
+
+  switch (type) {
+  case xpath_node_set::type_sorted:
+    return *begin;
+
+  case xpath_node_set::type_sorted_reverse:
+    return *(end - 1);
+
+  case xpath_node_set::type_unsorted:
+    return *min_element(begin, end, document_order_comparator());
+
+  default:
+    assert(!"Invalid node set type");
+    return xpath_node();
+  }
+}
+
+class xpath_node_set_raw
+{
+  xpath_node_set::type_t _type;
+
+  xpath_node* _begin;
+  xpath_node* _end;
+  xpath_node* _eos;
+
+public:
+  xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) {
+  }
+
+  xpath_node* begin() const {
+    return _begin;
+  }
+
+  xpath_node* end() const {
+    return _end;
+  }
+
+  bool empty() const {
+    return _begin == _end;
+  }
+
+  size_t size() const {
+    return static_cast<size_t>(_end - _begin);
+  }
+
+  xpath_node first() const {
+    return xpath_first(_begin, _end, _type);
+  }
+
+  void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
+
+  void push_back(const xpath_node& node, xpath_allocator* alloc) {
+    if (_end != _eos)
+      *_end++ = node;
+    else
+      push_back_grow(node, alloc);
+  }
+
+  void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) {
+    if (begin_ == end_) return;
+
+    size_t size_ = static_cast<size_t>(_end - _begin);
+    size_t capacity = static_cast<size_t>(_eos - _begin);
+    size_t count = static_cast<size_t>(end_ - begin_);
+
+    if (size_ + count > capacity) {
+      // reallocate the old array or allocate a new one
+      xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
+      assert(data);
+
+      // finalize
+      _begin = data;
+      _end = data + size_;
+      _eos = data + size_ + count;
+    }
+
+    memcpy(_end, begin_, count * sizeof(xpath_node));
+    _end += count;
+  }
+
+  void sort_do() {
+    _type = xpath_sort(_begin, _end, _type, false);
+  }
+
+  void truncate(xpath_node* pos) {
+    assert(_begin <= pos && pos <= _end);
+
+    _end = pos;
+  }
+
+  void remove_duplicates() {
+    if (_type == xpath_node_set::type_unsorted)
+      sort(_begin, _end, duplicate_comparator());
+
+    _end = unique(_begin, _end);
+  }
+
+  xpath_node_set::type_t type() const {
+    return _type;
+  }
+
+  void set_type(xpath_node_set::type_t value) {
+    _type = value;
+  }
+};
+
+PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
+{
+  size_t capacity = static_cast<size_t>(_eos - _begin);
+
+  // get new capacity (1.5x rule)
+  size_t new_capacity = capacity + capacity / 2 + 1;
+
+  // reallocate the old array or allocate a new one
+  xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
+  assert(data);
+
+  // finalize
+  _begin = data;
+  _end = data + capacity;
+  _eos = data + new_capacity;
+
+  // push
+  *_end++ = node;
+}
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+struct xpath_context {
+  xpath_node n;
+  size_t position, size;
+
+  xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) {
+  }
+};
+
+enum lexeme_t {
+  lex_none = 0,
+  lex_equal,
+  lex_not_equal,
+  lex_less,
+  lex_greater,
+  lex_less_or_equal,
+  lex_greater_or_equal,
+  lex_plus,
+  lex_minus,
+  lex_multiply,
+  lex_union,
+  lex_var_ref,
+  lex_open_brace,
+  lex_close_brace,
+  lex_quoted_string,
+  lex_number,
+  lex_slash,
+  lex_double_slash,
+  lex_open_square_brace,
+  lex_close_square_brace,
+  lex_string,
+  lex_comma,
+  lex_axis_attribute,
+  lex_dot,
+  lex_double_dot,
+  lex_double_colon,
+  lex_eof
+};
+
+struct xpath_lexer_string {
+  const char_t* begin;
+  const char_t* end;
+
+  xpath_lexer_string(): begin(0), end(0) {
+  }
+
+  bool operator==(const char_t* other) const {
+    size_t length = static_cast<size_t>(end - begin);
+
+    return strequalrange(other, begin, length);
+  }
+};
+
+class xpath_lexer
+{
+  const char_t* _cur;
+  const char_t* _cur_lexeme_pos;
+  xpath_lexer_string _cur_lexeme_contents;
+
+  lexeme_t _cur_lexeme;
+
+public:
+  explicit xpath_lexer(const char_t* query): _cur(query) {
+    next();
+  }
+
+  const char_t* state() const {
+    return _cur;
+  }
+
+  void next() {
+    const char_t* cur = _cur;
+
+    while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
+
+    // save lexeme position for error reporting
+    _cur_lexeme_pos = cur;
+
+    switch (*cur) {
+    case 0:
+      _cur_lexeme = lex_eof;
+      break;
+
+    case '>':
+      if (*(cur+1) == '=') {
+        cur += 2;
+        _cur_lexeme = lex_greater_or_equal;
+      } else {
+        cur += 1;
+        _cur_lexeme = lex_greater;
+      }
+      break;
+
+    case '<':
+      if (*(cur+1) == '=') {
+        cur += 2;
+        _cur_lexeme = lex_less_or_equal;
+      } else {
+        cur += 1;
+        _cur_lexeme = lex_less;
+      }
+      break;
+
+    case '!':
+      if (*(cur+1) == '=') {
+        cur += 2;
+        _cur_lexeme = lex_not_equal;
+      } else {
+        _cur_lexeme = lex_none;
+      }
+      break;
+
+    case '=':
+      cur += 1;
+      _cur_lexeme = lex_equal;
+
+      break;
+
+    case '+':
+      cur += 1;
+      _cur_lexeme = lex_plus;
+
+      break;
+
+    case '-':
+      cur += 1;
+      _cur_lexeme = lex_minus;
+
+      break;
+
+    case '*':
+      cur += 1;
+      _cur_lexeme = lex_multiply;
+
+      break;
+
+    case '|':
+      cur += 1;
+      _cur_lexeme = lex_union;
+
+      break;
+
+    case '$':
+      cur += 1;
+
+      if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
+        _cur_lexeme_contents.begin = cur;
+
+        while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+        if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // qname
+          cur++; // :
+
+          while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+        }
+
+        _cur_lexeme_contents.end = cur;
+
+        _cur_lexeme = lex_var_ref;
+      } else {
+        _cur_lexeme = lex_none;
+      }
+
+      break;
+
+    case '(':
+      cur += 1;
+      _cur_lexeme = lex_open_brace;
+
+      break;
+
+    case ')':
+      cur += 1;
+      _cur_lexeme = lex_close_brace;
+
+      break;
+
+    case '[':
+      cur += 1;
+      _cur_lexeme = lex_open_square_brace;
+
+      break;
+
+    case ']':
+      cur += 1;
+      _cur_lexeme = lex_close_square_brace;
+
+      break;
+
+    case ',':
+      cur += 1;
+      _cur_lexeme = lex_comma;
+
+      break;
+
+    case '/':
+      if (*(cur+1) == '/') {
+        cur += 2;
+        _cur_lexeme = lex_double_slash;
+      } else {
+        cur += 1;
+        _cur_lexeme = lex_slash;
+      }
+      break;
+
+    case '.':
+      if (*(cur+1) == '.') {
+        cur += 2;
+        _cur_lexeme = lex_double_dot;
+      } else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) {
+        _cur_lexeme_contents.begin = cur; // .
+
+        ++cur;
+
+        while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+        _cur_lexeme_contents.end = cur;
+
+        _cur_lexeme = lex_number;
+      } else {
+        cur += 1;
+        _cur_lexeme = lex_dot;
+      }
+      break;
+
+    case '@':
+      cur += 1;
+      _cur_lexeme = lex_axis_attribute;
+
+      break;
+
+    case '"':
+    case '\'': {
+      char_t terminator = *cur;
+
+      ++cur;
+
+      _cur_lexeme_contents.begin = cur;
+      while (*cur && *cur != terminator) cur++;
+      _cur_lexeme_contents.end = cur;
+
+      if (!*cur)
+        _cur_lexeme = lex_none;
+      else {
+        cur += 1;
+        _cur_lexeme = lex_quoted_string;
+      }
+
+      break;
+    }
+
+    case ':':
+      if (*(cur+1) == ':') {
+        cur += 2;
+        _cur_lexeme = lex_double_colon;
+      } else {
+        _cur_lexeme = lex_none;
+      }
+      break;
+
+    default:
+      if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) {
+        _cur_lexeme_contents.begin = cur;
+
+        while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+
+        if (*cur == '.') {
+          cur++;
+
+          while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
+        }
+
+        _cur_lexeme_contents.end = cur;
+
+        _cur_lexeme = lex_number;
+      } else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
+        _cur_lexeme_contents.begin = cur;
+
+        while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+
+        if (cur[0] == ':') {
+          if (cur[1] == '*') { // namespace test ncname:*
+            cur += 2; // :*
+          } else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) { // namespace test qname
+            cur++; // :
+
+            while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
+          }
+        }
+
+        _cur_lexeme_contents.end = cur;
+
+        _cur_lexeme = lex_string;
+      } else {
+        _cur_lexeme = lex_none;
+      }
+    }
+
+    _cur = cur;
+  }
+
+  lexeme_t current() const {
+    return _cur_lexeme;
+  }
+
+  const char_t* current_pos() const {
+    return _cur_lexeme_pos;
+  }
+
+  const xpath_lexer_string& contents() const {
+    assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
+
+    return _cur_lexeme_contents;
+  }
+};
+
+enum ast_type_t {
+  ast_unknown,
+  ast_op_or,						// left or right
+  ast_op_and,						// left and right
+  ast_op_equal,					// left = right
+  ast_op_not_equal,				// left != right
+  ast_op_less,					// left < right
+  ast_op_greater,					// left > right
+  ast_op_less_or_equal,			// left <= right
+  ast_op_greater_or_equal,		// left >= right
+  ast_op_add,						// left + right
+  ast_op_subtract,				// left - right
+  ast_op_multiply,				// left * right
+  ast_op_divide,					// left / right
+  ast_op_mod,						// left % right
+  ast_op_negate,					// left - right
+  ast_op_union,					// left | right
+  ast_predicate,					// apply predicate to set; next points to next predicate
+  ast_filter,						// select * from left where right
+  ast_string_constant,			// string constant
+  ast_number_constant,			// number constant
+  ast_variable,					// variable
+  ast_func_last,					// last()
+  ast_func_position,				// position()
+  ast_func_count,					// count(left)
+  ast_func_id,					// id(left)
+  ast_func_local_name_0,			// local-name()
+  ast_func_local_name_1,			// local-name(left)
+  ast_func_namespace_uri_0,		// namespace-uri()
+  ast_func_namespace_uri_1,		// namespace-uri(left)
+  ast_func_name_0,				// name()
+  ast_func_name_1,				// name(left)
+  ast_func_string_0,				// string()
+  ast_func_string_1,				// string(left)
+  ast_func_concat,				// concat(left, right, siblings)
+  ast_func_starts_with,			// starts_with(left, right)
+  ast_func_contains,				// contains(left, right)
+  ast_func_substring_before,		// substring-before(left, right)
+  ast_func_substring_after,		// substring-after(left, right)
+  ast_func_substring_2,			// substring(left, right)
+  ast_func_substring_3,			// substring(left, right, third)
+  ast_func_string_length_0,		// string-length()
+  ast_func_string_length_1,		// string-length(left)
+  ast_func_normalize_space_0,		// normalize-space()
+  ast_func_normalize_space_1,		// normalize-space(left)
+  ast_func_translate,				// translate(left, right, third)
+  ast_func_boolean,				// boolean(left)
+  ast_func_not,					// not(left)
+  ast_func_true,					// true()
+  ast_func_false,					// false()
+  ast_func_lang,					// lang(left)
+  ast_func_number_0,				// number()
+  ast_func_number_1,				// number(left)
+  ast_func_sum,					// sum(left)
+  ast_func_floor,					// floor(left)
+  ast_func_ceiling,				// ceiling(left)
+  ast_func_round,					// round(left)
+  ast_step,						// process set left with step
+  ast_step_root,					// select root node
+
+  ast_opt_translate_table,		// translate(left, right, third) where right/third are constants
+  ast_opt_compare_attribute		// @name = 'string'
+};
+
+enum axis_t {
+  axis_ancestor,
+  axis_ancestor_or_self,
+  axis_attribute,
+  axis_child,
+  axis_descendant,
+  axis_descendant_or_self,
+  axis_following,
+  axis_following_sibling,
+  axis_namespace,
+  axis_parent,
+  axis_preceding,
+  axis_preceding_sibling,
+  axis_self
+};
+
+enum nodetest_t {
+  nodetest_none,
+  nodetest_name,
+  nodetest_type_node,
+  nodetest_type_comment,
+  nodetest_type_pi,
+  nodetest_type_text,
+  nodetest_pi,
+  nodetest_all,
+  nodetest_all_in_namespace
+};
+
+enum predicate_t {
+  predicate_default,
+  predicate_posinv,
+  predicate_constant,
+  predicate_constant_one
+};
+
+enum nodeset_eval_t {
+  nodeset_eval_all,
+  nodeset_eval_any,
+  nodeset_eval_first
+};
+
+template <axis_t N> struct axis_to_type {
+  static const axis_t axis;
+};
+
+template <axis_t N> const axis_t axis_to_type<N>::axis = N;
+
+class xpath_ast_node
+{
+private:
+  // node type
+  char _type;
+  char _rettype;
+
+  // for ast_step
+  char _axis;
+
+  // for ast_step/ast_predicate/ast_filter
+  char _test;
+
+  // tree node structure
+  xpath_ast_node* _left;
+  xpath_ast_node* _right;
+  xpath_ast_node* _next;
+
+  union {
+    // value for ast_string_constant
+    const char_t* string;
+    // value for ast_number_constant
+    double number;
+    // variable for ast_variable
+    xpath_variable* variable;
+    // node test for ast_step (node name/namespace/node type/pi target)
+    const char_t* nodetest;
+    // table for ast_opt_translate_table
+    const unsigned char* table;
+  } _data;
+
+  xpath_ast_node(const xpath_ast_node&);
+  xpath_ast_node& operator=(const xpath_ast_node&);
+
+  template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
+    xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+    if (lt != xpath_type_node_set && rt != xpath_type_node_set) {
+      if (lt == xpath_type_boolean || rt == xpath_type_boolean)
+        return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+      else if (lt == xpath_type_number || rt == xpath_type_number)
+        return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+      else if (lt == xpath_type_string || rt == xpath_type_string) {
+        xpath_allocator_capture cr(stack.result);
+
+        xpath_string ls = lhs->eval_string(c, stack);
+        xpath_string rs = rhs->eval_string(c, stack);
+
+        return comp(ls, rs);
+      }
+    } else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+      xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+      for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
+        for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+          xpath_allocator_capture cri(stack.result);
+
+          if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
+            return true;
+        }
+
+      return false;
+    } else {
+      if (lt == xpath_type_node_set) {
+        swap(lhs, rhs);
+        swap(lt, rt);
+      }
+
+      if (lt == xpath_type_boolean)
+        return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
+      else if (lt == xpath_type_number) {
+        xpath_allocator_capture cr(stack.result);
+
+        double l = lhs->eval_number(c, stack);
+        xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+        for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+          xpath_allocator_capture cri(stack.result);
+
+          if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+            return true;
+        }
+
+        return false;
+      } else if (lt == xpath_type_string) {
+        xpath_allocator_capture cr(stack.result);
+
+        xpath_string l = lhs->eval_string(c, stack);
+        xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+        for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+          xpath_allocator_capture cri(stack.result);
+
+          if (comp(l, string_value(*ri, stack.result)))
+            return true;
+        }
+
+        return false;
+      }
+    }
+
+    assert(!"Wrong types");
+    return false;
+  }
+
+  static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) {
+    return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
+  }
+
+  template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) {
+    xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
+
+    if (lt != xpath_type_node_set && rt != xpath_type_node_set)
+      return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
+    else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+      xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+      for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
+        xpath_allocator_capture cri(stack.result);
+
+        double l = convert_string_to_number(string_value(*li, stack.result).c_str());
+
+        for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+          xpath_allocator_capture crii(stack.result);
+
+          if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+            return true;
+        }
+      }
+
+      return false;
+    } else if (lt != xpath_type_node_set && rt == xpath_type_node_set) {
+      xpath_allocator_capture cr(stack.result);
+
+      double l = lhs->eval_number(c, stack);
+      xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
+
+      for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
+        xpath_allocator_capture cri(stack.result);
+
+        if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
+          return true;
+      }
+
+      return false;
+    } else if (lt == xpath_type_node_set && rt != xpath_type_node_set) {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
+      double r = rhs->eval_number(c, stack);
+
+      for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
+        xpath_allocator_capture cri(stack.result);
+
+        if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
+          return true;
+      }
+
+      return false;
+    } else {
+      assert(!"Wrong types");
+      return false;
+    }
+  }
+
+  static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) {
+    assert(ns.size() >= first);
+    assert(expr->rettype() != xpath_type_number);
+
+    size_t i = 1;
+    size_t size = ns.size() - first;
+
+    xpath_node* last = ns.begin() + first;
+
+    // remove_if... or well, sort of
+    for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
+      xpath_context c(*it, i, size);
+
+      if (expr->eval_boolean(c, stack)) {
+        *last++ = *it;
+
+        if (once) break;
+      }
+    }
+
+    ns.truncate(last);
+  }
+
+  static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) {
+    assert(ns.size() >= first);
+    assert(expr->rettype() == xpath_type_number);
+
+    size_t i = 1;
+    size_t size = ns.size() - first;
+
+    xpath_node* last = ns.begin() + first;
+
+    // remove_if... or well, sort of
+    for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
+      xpath_context c(*it, i, size);
+
+      if (expr->eval_number(c, stack) == i) {
+        *last++ = *it;
+
+        if (once) break;
+      }
+    }
+
+    ns.truncate(last);
+  }
+
+  static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) {
+    assert(ns.size() >= first);
+    assert(expr->rettype() == xpath_type_number);
+
+    size_t size = ns.size() - first;
+
+    xpath_node* last = ns.begin() + first;
+
+    xpath_context c(xpath_node(), 1, size);
+
+    double er = expr->eval_number(c, stack);
+
+    if (er >= 1.0 && er <= size) {
+      size_t eri = static_cast<size_t>(er);
+
+      if (er == eri) {
+        xpath_node r = last[eri - 1];
+
+        *last++ = r;
+      }
+    }
+
+    ns.truncate(last);
+  }
+
+  void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) {
+    if (ns.size() == first) return;
+
+    assert(_type == ast_filter || _type == ast_predicate);
+
+    if (_test == predicate_constant || _test == predicate_constant_one)
+      apply_predicate_number_const(ns, first, _right, stack);
+    else if (_right->rettype() == xpath_type_number)
+      apply_predicate_number(ns, first, _right, stack, once);
+    else
+      apply_predicate_boolean(ns, first, _right, stack, once);
+  }
+
+  void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) {
+    if (ns.size() == first) return;
+
+    bool last_once = eval_once(ns.type(), eval);
+
+    for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
+      pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
+  }
+
+  bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) {
+    assert(a);
+
+    const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
+
+    switch (_test) {
+    case nodetest_name:
+      if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) {
+        ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_type_node:
+    case nodetest_all:
+      if (is_xpath_attribute(name)) {
+        ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_all_in_namespace:
+      if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) {
+        ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
+        return true;
+      }
+      break;
+
+    default:
+      ;
+    }
+
+    return false;
+  }
+
+  bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) {
+    assert(n);
+
+    xml_node_type type = PUGI__NODETYPE(n);
+
+    switch (_test) {
+    case nodetest_name:
+      if (type == node_element && n->name && strequal(n->name, _data.nodetest)) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_type_node:
+      ns.push_back(xml_node(n), alloc);
+      return true;
+
+    case nodetest_type_comment:
+      if (type == node_comment) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_type_text:
+      if (type == node_pcdata || type == node_cdata) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_type_pi:
+      if (type == node_pi) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_pi:
+      if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_all:
+      if (type == node_element) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    case nodetest_all_in_namespace:
+      if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) {
+        ns.push_back(xml_node(n), alloc);
+        return true;
+      }
+      break;
+
+    default:
+      assert(!"Unknown axis");
+    }
+
+    return false;
+  }
+
+  template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) {
+    const axis_t axis = T::axis;
+
+    switch (axis) {
+    case axis_attribute: {
+      for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
+        if (step_push(ns, a, n, alloc) & once)
+          return;
+
+      break;
+    }
+
+    case axis_child: {
+      for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
+        if (step_push(ns, c, alloc) & once)
+          return;
+
+      break;
+    }
+
+    case axis_descendant:
+    case axis_descendant_or_self: {
+      if (axis == axis_descendant_or_self)
+        if (step_push(ns, n, alloc) & once)
+          return;
+
+      xml_node_struct* cur = n->first_child;
+
+      while (cur) {
+        if (step_push(ns, cur, alloc) & once)
+          return;
+
+        if (cur->first_child)
+          cur = cur->first_child;
+        else {
+          while (!cur->next_sibling) {
+            cur = cur->parent;
+
+            if (cur == n) return;
+          }
+
+          cur = cur->next_sibling;
+        }
+      }
+
+      break;
+    }
+
+    case axis_following_sibling: {
+      for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
+        if (step_push(ns, c, alloc) & once)
+          return;
+
+      break;
+    }
+
+    case axis_preceding_sibling: {
+      for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
+        if (step_push(ns, c, alloc) & once)
+          return;
+
+      break;
+    }
+
+    case axis_following: {
+      xml_node_struct* cur = n;
+
+      // exit from this node so that we don't include descendants
+      while (!cur->next_sibling) {
+        cur = cur->parent;
+
+        if (!cur) return;
+      }
+
+      cur = cur->next_sibling;
+
+      while (cur) {
+        if (step_push(ns, cur, alloc) & once)
+          return;
+
+        if (cur->first_child)
+          cur = cur->first_child;
+        else {
+          while (!cur->next_sibling) {
+            cur = cur->parent;
+
+            if (!cur) return;
+          }
+
+          cur = cur->next_sibling;
+        }
+      }
+
+      break;
+    }
+
+    case axis_preceding: {
+      xml_node_struct* cur = n;
+
+      // exit from this node so that we don't include descendants
+      while (!cur->prev_sibling_c->next_sibling) {
+        cur = cur->parent;
+
+        if (!cur) return;
+      }
+
+      cur = cur->prev_sibling_c;
+
+      while (cur) {
+        if (cur->first_child)
+          cur = cur->first_child->prev_sibling_c;
+        else {
+          // leaf node, can't be ancestor
+          if (step_push(ns, cur, alloc) & once)
+            return;
+
+          while (!cur->prev_sibling_c->next_sibling) {
+            cur = cur->parent;
+
+            if (!cur) return;
+
+            if (!node_is_ancestor(cur, n))
+              if (step_push(ns, cur, alloc) & once)
+                return;
+          }
+
+          cur = cur->prev_sibling_c;
+        }
+      }
+
+      break;
+    }
+
+    case axis_ancestor:
+    case axis_ancestor_or_self: {
+      if (axis == axis_ancestor_or_self)
+        if (step_push(ns, n, alloc) & once)
+          return;
+
+      xml_node_struct* cur = n->parent;
+
+      while (cur) {
+        if (step_push(ns, cur, alloc) & once)
+          return;
+
+        cur = cur->parent;
+      }
+
+      break;
+    }
+
+    case axis_self: {
+      step_push(ns, n, alloc);
+
+      break;
+    }
+
+    case axis_parent: {
+      if (n->parent)
+        step_push(ns, n->parent, alloc);
+
+      break;
+    }
+
+    default:
+      assert(!"Unimplemented axis");
+    }
+  }
+
+  template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) {
+    const axis_t axis = T::axis;
+
+    switch (axis) {
+    case axis_ancestor:
+    case axis_ancestor_or_self: {
+      if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
+        if (step_push(ns, a, p, alloc) & once)
+          return;
+
+      xml_node_struct* cur = p;
+
+      while (cur) {
+        if (step_push(ns, cur, alloc) & once)
+          return;
+
+        cur = cur->parent;
+      }
+
+      break;
+    }
+
+    case axis_descendant_or_self:
+    case axis_self: {
+      if (_test == nodetest_type_node) // reject attributes based on principal node type test
+        step_push(ns, a, p, alloc);
+
+      break;
+    }
+
+    case axis_following: {
+      xml_node_struct* cur = p;
+
+      while (cur) {
+        if (cur->first_child)
+          cur = cur->first_child;
+        else {
+          while (!cur->next_sibling) {
+            cur = cur->parent;
+
+            if (!cur) return;
+          }
+
+          cur = cur->next_sibling;
+        }
+
+        if (step_push(ns, cur, alloc) & once)
+          return;
+      }
+
+      break;
+    }
+
+    case axis_parent: {
+      step_push(ns, p, alloc);
+
+      break;
+    }
+
+    case axis_preceding: {
+      // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
+      step_fill(ns, p, alloc, once, v);
+      break;
+    }
+
+    default:
+      assert(!"Unimplemented axis");
+    }
+  }
+
+  template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) {
+    const axis_t axis = T::axis;
+    const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
+
+    if (xn.node())
+      step_fill(ns, xn.node().internal_object(), alloc, once, v);
+    else if (axis_has_attributes && xn.attribute() && xn.parent())
+      step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
+  }
+
+  template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) {
+    const axis_t axis = T::axis;
+    const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
+    const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
+
+    bool once =
+      (axis == axis_attribute && _test == nodetest_name) ||
+      (!_right && eval_once(axis_type, eval)) ||
+      (_right && !_right->_next && _right->_test == predicate_constant_one);
+
+    xpath_node_set_raw ns;
+    ns.set_type(axis_type);
+
+    if (_left) {
+      xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
+
+      // self axis preserves the original order
+      if (axis == axis_self) ns.set_type(s.type());
+
+      for (const xpath_node* it = s.begin(); it != s.end(); ++it) {
+        size_t size = ns.size();
+
+        // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
+        if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
+
+        step_fill(ns, *it, stack.result, once, v);
+        if (_right) apply_predicates(ns, size, stack, eval);
+      }
+    } else {
+      step_fill(ns, c.n, stack.result, once, v);
+      if (_right) apply_predicates(ns, 0, stack, eval);
+    }
+
+    // child, attribute and self axes always generate unique set of nodes
+    // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
+    if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
+      ns.remove_duplicates();
+
+    return ns;
+  }
+
+public:
+  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
+    _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+    assert(type == ast_string_constant);
+    _data.string = value;
+  }
+
+  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
+    _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+    assert(type == ast_number_constant);
+    _data.number = value;
+  }
+
+  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
+    _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) {
+    assert(type == ast_variable);
+    _data.variable = value;
+  }
+
+  xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
+    _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) {
+  }
+
+  xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
+    _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) {
+    assert(type == ast_step);
+    _data.nodetest = contents;
+  }
+
+  xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
+    _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) {
+    assert(type == ast_filter || type == ast_predicate);
+  }
+
+  void set_next(xpath_ast_node* value) {
+    _next = value;
+  }
+
+  void set_right(xpath_ast_node* value) {
+    _right = value;
+  }
+
+  bool eval_boolean(const xpath_context& c, const xpath_stack& stack) {
+    switch (_type) {
+    case ast_op_or:
+      return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
+
+    case ast_op_and:
+      return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
+
+    case ast_op_equal:
+      return compare_eq(_left, _right, c, stack, equal_to());
+
+    case ast_op_not_equal:
+      return compare_eq(_left, _right, c, stack, not_equal_to());
+
+    case ast_op_less:
+      return compare_rel(_left, _right, c, stack, less());
+
+    case ast_op_greater:
+      return compare_rel(_right, _left, c, stack, less());
+
+    case ast_op_less_or_equal:
+      return compare_rel(_left, _right, c, stack, less_equal());
+
+    case ast_op_greater_or_equal:
+      return compare_rel(_right, _left, c, stack, less_equal());
+
+    case ast_func_starts_with: {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_string lr = _left->eval_string(c, stack);
+      xpath_string rr = _right->eval_string(c, stack);
+
+      return starts_with(lr.c_str(), rr.c_str());
+    }
+
+    case ast_func_contains: {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_string lr = _left->eval_string(c, stack);
+      xpath_string rr = _right->eval_string(c, stack);
+
+      return find_substring(lr.c_str(), rr.c_str()) != 0;
+    }
+
+    case ast_func_boolean:
+      return _left->eval_boolean(c, stack);
+
+    case ast_func_not:
+      return !_left->eval_boolean(c, stack);
+
+    case ast_func_true:
+      return true;
+
+    case ast_func_false:
+      return false;
+
+    case ast_func_lang: {
+      if (c.n.attribute()) return false;
+
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_string lang = _left->eval_string(c, stack);
+
+      for (xml_node n = c.n.node(); n; n = n.parent()) {
+        xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
+
+        if (a) {
+          const char_t* value = a.value();
+
+          // strnicmp / strncasecmp is not portable
+          for (const char_t* lit = lang.c_str(); *lit; ++lit) {
+            if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
+            ++value;
+          }
+
+          return *value == 0 || *value == '-';
+        }
+      }
+
+      return false;
+    }
+
+    case ast_opt_compare_attribute: {
+      const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
+
+      xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
+
+      return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
+    }
+
+    case ast_variable: {
+      assert(_rettype == _data.variable->type());
+
+      if (_rettype == xpath_type_boolean)
+        return _data.variable->get_boolean();
+
+      // fallthrough to type conversion
+    }
+
+    default: {
+      switch (_rettype) {
+      case xpath_type_number:
+        return convert_number_to_boolean(eval_number(c, stack));
+
+      case xpath_type_string: {
+        xpath_allocator_capture cr(stack.result);
+
+        return !eval_string(c, stack).empty();
+      }
+
+      case xpath_type_node_set: {
+        xpath_allocator_capture cr(stack.result);
+
+        return !eval_node_set(c, stack, nodeset_eval_any).empty();
+      }
+
+      default:
+        assert(!"Wrong expression for return type boolean");
+        return false;
+      }
+    }
+    }
+  }
+
+  double eval_number(const xpath_context& c, const xpath_stack& stack) {
+    switch (_type) {
+    case ast_op_add:
+      return _left->eval_number(c, stack) + _right->eval_number(c, stack);
+
+    case ast_op_subtract:
+      return _left->eval_number(c, stack) - _right->eval_number(c, stack);
+
+    case ast_op_multiply:
+      return _left->eval_number(c, stack) * _right->eval_number(c, stack);
+
+    case ast_op_divide:
+      return _left->eval_number(c, stack) / _right->eval_number(c, stack);
+
+    case ast_op_mod:
+      return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
+
+    case ast_op_negate:
+      return -_left->eval_number(c, stack);
+
+    case ast_number_constant:
+      return _data.number;
+
+    case ast_func_last:
+      return static_cast<double>(c.size);
+
+    case ast_func_position:
+      return static_cast<double>(c.position);
+
+    case ast_func_count: {
+      xpath_allocator_capture cr(stack.result);
+
+      return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
+    }
+
+    case ast_func_string_length_0: {
+      xpath_allocator_capture cr(stack.result);
+
+      return static_cast<double>(string_value(c.n, stack.result).length());
+    }
+
+    case ast_func_string_length_1: {
+      xpath_allocator_capture cr(stack.result);
+
+      return static_cast<double>(_left->eval_string(c, stack).length());
+    }
+
+    case ast_func_number_0: {
+      xpath_allocator_capture cr(stack.result);
+
+      return convert_string_to_number(string_value(c.n, stack.result).c_str());
+    }
+
+    case ast_func_number_1:
+      return _left->eval_number(c, stack);
+
+    case ast_func_sum: {
+      xpath_allocator_capture cr(stack.result);
+
+      double r = 0;
+
+      xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
+
+      for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) {
+        xpath_allocator_capture cri(stack.result);
+
+        r += convert_string_to_number(string_value(*it, stack.result).c_str());
+      }
+
+      return r;
+    }
+
+    case ast_func_floor: {
+      double r = _left->eval_number(c, stack);
+
+      return r == r ? floor(r) : r;
+    }
+
+    case ast_func_ceiling: {
+      double r = _left->eval_number(c, stack);
+
+      return r == r ? ceil(r) : r;
+    }
+
+    case ast_func_round:
+      return round_nearest_nzero(_left->eval_number(c, stack));
+
+    case ast_variable: {
+      assert(_rettype == _data.variable->type());
+
+      if (_rettype == xpath_type_number)
+        return _data.variable->get_number();
+
+      // fallthrough to type conversion
+    }
+
+    default: {
+      switch (_rettype) {
+      case xpath_type_boolean:
+        return eval_boolean(c, stack) ? 1 : 0;
+
+      case xpath_type_string: {
+        xpath_allocator_capture cr(stack.result);
+
+        return convert_string_to_number(eval_string(c, stack).c_str());
+      }
+
+      case xpath_type_node_set: {
+        xpath_allocator_capture cr(stack.result);
+
+        return convert_string_to_number(eval_string(c, stack).c_str());
+      }
+
+      default:
+        assert(!"Wrong expression for return type number");
+        return 0;
+      }
+
+    }
+    }
+  }
+
+  xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) {
+    assert(_type == ast_func_concat);
+
+    xpath_allocator_capture ct(stack.temp);
+
+    // count the string number
+    size_t count = 1;
+    for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
+
+    // gather all strings
+    xpath_string static_buffer[4];
+    xpath_string* buffer = static_buffer;
+
+    // allocate on-heap for large concats
+    if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) {
+      buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
+      assert(buffer);
+    }
+
+    // evaluate all strings to temporary stack
+    xpath_stack swapped_stack = {stack.temp, stack.result};
+
+    buffer[0] = _left->eval_string(c, swapped_stack);
+
+    size_t pos = 1;
+    for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
+    assert(pos == count);
+
+    // get total length
+    size_t length = 0;
+    for (size_t i = 0; i < count; ++i) length += buffer[i].length();
+
+    // create final string
+    char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
+    assert(result);
+
+    char_t* ri = result;
+
+    for (size_t j = 0; j < count; ++j)
+      for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
+        *ri++ = *bi;
+
+    *ri = 0;
+
+    return xpath_string::from_heap_preallocated(result, ri);
+  }
+
+  xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) {
+    switch (_type) {
+    case ast_string_constant:
+      return xpath_string::from_const(_data.string);
+
+    case ast_func_local_name_0: {
+      xpath_node na = c.n;
+
+      return xpath_string::from_const(local_name(na));
+    }
+
+    case ast_func_local_name_1: {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+      xpath_node na = ns.first();
+
+      return xpath_string::from_const(local_name(na));
+    }
+
+    case ast_func_name_0: {
+      xpath_node na = c.n;
+
+      return xpath_string::from_const(qualified_name(na));
+    }
+
+    case ast_func_name_1: {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+      xpath_node na = ns.first();
+
+      return xpath_string::from_const(qualified_name(na));
+    }
+
+    case ast_func_namespace_uri_0: {
+      xpath_node na = c.n;
+
+      return xpath_string::from_const(namespace_uri(na));
+    }
+
+    case ast_func_namespace_uri_1: {
+      xpath_allocator_capture cr(stack.result);
+
+      xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
+      xpath_node na = ns.first();
+
+      return xpath_string::from_const(namespace_uri(na));
+    }
+
+    case ast_func_string_0:
+      return string_value(c.n, stack.result);
+
+    case ast_func_string_1:
+      return _left->eval_string(c, stack);
+
+    case ast_func_concat:
+      return eval_string_concat(c, stack);
+
+    case ast_func_substring_before: {
+      xpath_allocator_capture cr(stack.temp);
+
+      xpath_stack swapped_stack = {stack.temp, stack.result};
+
+      xpath_string s = _left->eval_string(c, swapped_stack);
+      xpath_string p = _right->eval_string(c, swapped_stack);
+
+      const char_t* pos = find_substring(s.c_str(), p.c_str());
+
+      return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
+    }
+
+    case ast_func_substring_after: {
+      xpath_allocator_capture cr(stack.temp);
+
+      xpath_stack swapped_stack = {stack.temp, stack.result};
+
+      xpath_string s = _left->eval_string(c, swapped_stack);
+      xpath_string p = _right->eval_string(c, swapped_stack);
+
+      const char_t* pos = find_substring(s.c_str(), p.c_str());
+      if (!pos) return xpath_string();
+
+      const char_t* rbegin = pos + p.length();
+      const char_t* rend = s.c_str() + s.length();
+
+      return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
+    }
+
+    case ast_func_substring_2: {
+      xpath_allocator_capture cr(stack.temp);
+
+      xpath_stack swapped_stack = {stack.temp, stack.result};
+
+      xpath_string s = _left->eval_string(c, swapped_stack);
+      size_t s_length = s.length();
+
+      double first = round_nearest(_right->eval_number(c, stack));
+
+      if (is_nan(first)) return xpath_string(); // NaN
+      else if (first >= s_length + 1) return xpath_string();
+
+      size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+      assert(1 <= pos && pos <= s_length + 1);
+
+      const char_t* rbegin = s.c_str() + (pos - 1);
+      const char_t* rend = s.c_str() + s.length();
+
+      return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
+    }
+
+    case ast_func_substring_3: {
+      xpath_allocator_capture cr(stack.temp);
+
+      xpath_stack swapped_stack = {stack.temp, stack.result};
+
+      xpath_string s = _left->eval_string(c, swapped_stack);
+      size_t s_length = s.length();
+
+      double first = round_nearest(_right->eval_number(c, stack));
+      double last = first + round_nearest(_right->_next->eval_number(c, stack));
+
+      if (is_nan(first) || is_nan(last)) return xpath_string();
+      else if (first >= s_length + 1) return xpath_string();
+      else if (first >= last) return xpath_string();
+      else if (last < 1) return xpath_string();
+
+      size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
+      size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
+
+      assert(1 <= pos && pos <= end && end <= s_length + 1);
+      const char_t* rbegin = s.c_str() + (pos - 1);
+      const char_t* rend = s.c_str() + (end - 1);
+
+      return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
+    }
+
+    case ast_func_normalize_space_0: {
+      xpath_string s = string_value(c.n, stack.result);
+
+      char_t* begin = s.data(stack.result);
+      char_t* end = normalize_space(begin);
+
+      return xpath_string::from_heap_preallocated(begin, end);
+    }
+
+    case ast_func_normalize_space_1: {
+      xpath_string s = _left->eval_string(c, stack);
+
+      char_t* begin = s.data(stack.result);
+      char_t* end = normalize_space(begin);
+
+      return xpath_string::from_heap_preallocated(begin, end);
+    }
+
+    case ast_func_translate: {
+      xpath_allocator_capture cr(stack.temp);
+
+      xpath_stack swapped_stack = {stack.temp, stack.result};
+
+      xpath_string s = _left->eval_string(c, stack);
+      xpath_string from = _right->eval_string(c, swapped_stack);
+      xpath_string to = _right->_next->eval_string(c, swapped_stack);
+
+      char_t* begin = s.data(stack.result);
+      char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
+
+      return xpath_string::from_heap_preallocated(begin, end);
+    }
+
+    case ast_opt_translate_table: {
+      xpath_string s = _left->eval_string(c, stack);
+
+      char_t* begin = s.data(stack.result);
+      char_t* end = translate_table(begin, _data.table);
+
+      return xpath_string::from_heap_preallocated(begin, end);
+    }
+
+    case ast_variable: {
+      assert(_rettype == _data.variable->type());
+
+      if (_rettype == xpath_type_string)
+        return xpath_string::from_const(_data.variable->get_string());
+
+      // fallthrough to type conversion
+    }
+
+    default: {
+      switch (_rettype) {
+      case xpath_type_boolean:
+        return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+
+      case xpath_type_number:
+        return convert_number_to_string(eval_number(c, stack), stack.result);
+
+      case xpath_type_node_set: {
+        xpath_allocator_capture cr(stack.temp);
+
+        xpath_stack swapped_stack = {stack.temp, stack.result};
+
+        xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
+        return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
+      }
+
+      default:
+        assert(!"Wrong expression for return type string");
+        return xpath_string();
+      }
+    }
+    }
+  }
+
+  xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) {
+    switch (_type) {
+    case ast_op_union: {
+      xpath_allocator_capture cr(stack.temp);
+
+      xpath_stack swapped_stack = {stack.temp, stack.result};
+
+      xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
+      xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
+
+      // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
+      rs.set_type(xpath_node_set::type_unsorted);
+
+      rs.append(ls.begin(), ls.end(), stack.result);
+      rs.remove_duplicates();
+
+      return rs;
+    }
+
+    case ast_filter: {
+      xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
+
+      // either expression is a number or it contains position() call; sort by document order
+      if (_test != predicate_posinv) set.sort_do();
+
+      bool once = eval_once(set.type(), eval);
+
+      apply_predicate(set, 0, stack, once);
+
+      return set;
+    }
+
+    case ast_func_id:
+      return xpath_node_set_raw();
+
+    case ast_step: {
+      switch (_axis) {
+      case axis_ancestor:
+        return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
+
+      case axis_ancestor_or_self:
+        return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
+
+      case axis_attribute:
+        return step_do(c, stack, eval, axis_to_type<axis_attribute>());
+
+      case axis_child:
+        return step_do(c, stack, eval, axis_to_type<axis_child>());
+
+      case axis_descendant:
+        return step_do(c, stack, eval, axis_to_type<axis_descendant>());
+
+      case axis_descendant_or_self:
+        return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
+
+      case axis_following:
+        return step_do(c, stack, eval, axis_to_type<axis_following>());
+
+      case axis_following_sibling:
+        return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
+
+      case axis_namespace:
+        // namespaced axis is not supported
+        return xpath_node_set_raw();
+
+      case axis_parent:
+        return step_do(c, stack, eval, axis_to_type<axis_parent>());
+
+      case axis_preceding:
+        return step_do(c, stack, eval, axis_to_type<axis_preceding>());
+
+      case axis_preceding_sibling:
+        return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
+
+      case axis_self:
+        return step_do(c, stack, eval, axis_to_type<axis_self>());
+
+      default:
+        assert(!"Unknown axis");
+        return xpath_node_set_raw();
+      }
+    }
+
+    case ast_step_root: {
+      assert(!_right); // root step can't have any predicates
+
+      xpath_node_set_raw ns;
+
+      ns.set_type(xpath_node_set::type_sorted);
+
+      if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
+      else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
+
+      return ns;
+    }
+
+    case ast_variable: {
+      assert(_rettype == _data.variable->type());
+
+      if (_rettype == xpath_type_node_set) {
+        const xpath_node_set& s = _data.variable->get_node_set();
+
+        xpath_node_set_raw ns;
+
+        ns.set_type(s.type());
+        ns.append(s.begin(), s.end(), stack.result);
+
+        return ns;
+      }
+
+      // fallthrough to type conversion
+    }
+
+    default:
+      assert(!"Wrong expression for return type node set");
+      return xpath_node_set_raw();
+    }
+  }
+
+  void optimize(xpath_allocator* alloc) {
+    if (_left) _left->optimize(alloc);
+    if (_right) _right->optimize(alloc);
+    if (_next) _next->optimize(alloc);
+
+    optimize_self(alloc);
+  }
+
+  void optimize_self(xpath_allocator* alloc) {
+    // Rewrite [position()=expr] with [expr]
+    // Note that this step has to go before classification to recognize [position()=1]
+    if ((_type == ast_filter || _type == ast_predicate) &&
+        _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) {
+      _right = _right->_right;
+    }
+
+    // Classify filter/predicate ops to perform various optimizations during evaluation
+    if (_type == ast_filter || _type == ast_predicate) {
+      assert(_test == predicate_default);
+
+      if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
+        _test = predicate_constant_one;
+      else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
+        _test = predicate_constant;
+      else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
+        _test = predicate_posinv;
+    }
+
+    // Rewrite descendant-or-self::node()/child::foo with descendant::foo
+    // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
+    // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
+    // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
+    if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
+        _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
+        is_posinv_step()) {
+      if (_axis == axis_child || _axis == axis_descendant)
+        _axis = axis_descendant;
+      else
+        _axis = axis_descendant_or_self;
+
+      _left = _left->_left;
+    }
+
+    // Use optimized lookup table implementation for translate() with constant arguments
+    if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) {
+      unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
+
+      if (table) {
+        _type = ast_opt_translate_table;
+        _data.table = table;
+      }
+    }
+
+    // Use optimized path for @attr = 'value' or @attr = $value
+    if (_type == ast_op_equal &&
+        _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
+        (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) {
+      _type = ast_opt_compare_attribute;
+    }
+  }
+
+  bool is_posinv_expr() const {
+    switch (_type) {
+    case ast_func_position:
+    case ast_func_last:
+      return false;
+
+    case ast_string_constant:
+    case ast_number_constant:
+    case ast_variable:
+      return true;
+
+    case ast_step:
+    case ast_step_root:
+      return true;
+
+    case ast_predicate:
+    case ast_filter:
+      return true;
+
+    default:
+      if (_left && !_left->is_posinv_expr()) return false;
+
+      for (xpath_ast_node* n = _right; n; n = n->_next)
+        if (!n->is_posinv_expr()) return false;
+
+      return true;
+    }
+  }
+
+  bool is_posinv_step() const {
+    assert(_type == ast_step);
+
+    for (xpath_ast_node* n = _right; n; n = n->_next) {
+      assert(n->_type == ast_predicate);
+
+      if (n->_test != predicate_posinv)
+        return false;
+    }
+
+    return true;
+  }
+
+  xpath_value_type rettype() const {
+    return static_cast<xpath_value_type>(_rettype);
+  }
+};
+
+struct xpath_parser {
+  xpath_allocator* _alloc;
+  xpath_lexer _lexer;
+
+  const char_t* _query;
+  xpath_variable_set* _variables;
+
+  xpath_parse_result* _result;
+
+  char_t _scratch[32];
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  jmp_buf _error_handler;
+#endif
+
+  void throw_error(const char* message) {
+    _result->error = message;
+    _result->offset = _lexer.current_pos() - _query;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+    longjmp(_error_handler, 1);
+#else
+    throw xpath_exception(*_result);
+#endif
+  }
+
+  void throw_error_oom() {
+#ifdef PUGIXML_NO_EXCEPTIONS
+    throw_error("Out of memory");
+#else
+    throw std::bad_alloc();
+#endif
+  }
+
+  void* alloc_node() {
+    void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
+
+    if (!result) throw_error_oom();
+
+    return result;
+  }
+
+  const char_t* alloc_string(const xpath_lexer_string& value) {
+    if (value.begin) {
+      size_t length = static_cast<size_t>(value.end - value.begin);
+
+      char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
+      if (!c) throw_error_oom();
+      assert(c); // workaround for clang static analysis
+
+      memcpy(c, value.begin, length * sizeof(char_t));
+      c[length] = 0;
+
+      return c;
+    } else return 0;
+  }
+
+  xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) {
+    assert(argc <= 1);
+
+    if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+
+    return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
+  }
+
+  xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) {
+    switch (name.begin[0]) {
+    case 'b':
+      if (name == PUGIXML_TEXT("boolean") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
+
+      break;
+
+    case 'c':
+      if (name == PUGIXML_TEXT("count") && argc == 1) {
+        if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+        return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
+      } else if (name == PUGIXML_TEXT("contains") && argc == 2)
+        return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("concat") && argc >= 2)
+        return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
+
+      break;
+
+    case 'f':
+      if (name == PUGIXML_TEXT("false") && argc == 0)
+        return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
+      else if (name == PUGIXML_TEXT("floor") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
+
+      break;
+
+    case 'i':
+      if (name == PUGIXML_TEXT("id") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
+
+      break;
+
+    case 'l':
+      if (name == PUGIXML_TEXT("last") && argc == 0)
+        return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
+      else if (name == PUGIXML_TEXT("lang") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
+      else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
+        return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
+
+      break;
+
+    case 'n':
+      if (name == PUGIXML_TEXT("name") && argc <= 1)
+        return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
+      else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
+        return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
+      else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
+        return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("not") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
+      else if (name == PUGIXML_TEXT("number") && argc <= 1)
+        return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
+
+      break;
+
+    case 'p':
+      if (name == PUGIXML_TEXT("position") && argc == 0)
+        return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
+
+      break;
+
+    case 'r':
+      if (name == PUGIXML_TEXT("round") && argc == 1)
+        return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
+
+      break;
+
+    case 's':
+      if (name == PUGIXML_TEXT("string") && argc <= 1)
+        return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
+      else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
+        return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
+      else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
+        return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
+        return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
+        return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
+        return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("sum") && argc == 1) {
+        if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
+        return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
+      }
+
+      break;
+
+    case 't':
+      if (name == PUGIXML_TEXT("translate") && argc == 3)
+        return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
+      else if (name == PUGIXML_TEXT("true") && argc == 0)
+        return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
+
+      break;
+
+    default:
+      break;
+    }
+
+    throw_error("Unrecognized function or wrong parameter count");
+
+    return 0;
+  }
+
+  axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) {
+    specified = true;
+
+    switch (name.begin[0]) {
+    case 'a':
+      if (name == PUGIXML_TEXT("ancestor"))
+        return axis_ancestor;
+      else if (name == PUGIXML_TEXT("ancestor-or-self"))
+        return axis_ancestor_or_self;
+      else if (name == PUGIXML_TEXT("attribute"))
+        return axis_attribute;
+
+      break;
+
+    case 'c':
+      if (name == PUGIXML_TEXT("child"))
+        return axis_child;
+
+      break;
+
+    case 'd':
+      if (name == PUGIXML_TEXT("descendant"))
+        return axis_descendant;
+      else if (name == PUGIXML_TEXT("descendant-or-self"))
+        return axis_descendant_or_self;
+
+      break;
+
+    case 'f':
+      if (name == PUGIXML_TEXT("following"))
+        return axis_following;
+      else if (name == PUGIXML_TEXT("following-sibling"))
+        return axis_following_sibling;
+
+      break;
+
+    case 'n':
+      if (name == PUGIXML_TEXT("namespace"))
+        return axis_namespace;
+
+      break;
+
+    case 'p':
+      if (name == PUGIXML_TEXT("parent"))
+        return axis_parent;
+      else if (name == PUGIXML_TEXT("preceding"))
+        return axis_preceding;
+      else if (name == PUGIXML_TEXT("preceding-sibling"))
+        return axis_preceding_sibling;
+
+      break;
+
+    case 's':
+      if (name == PUGIXML_TEXT("self"))
+        return axis_self;
+
+      break;
+
+    default:
+      break;
+    }
+
+    specified = false;
+    return axis_child;
+  }
+
+  nodetest_t parse_node_test_type(const xpath_lexer_string& name) {
+    switch (name.begin[0]) {
+    case 'c':
+      if (name == PUGIXML_TEXT("comment"))
+        return nodetest_type_comment;
+
+      break;
+
+    case 'n':
+      if (name == PUGIXML_TEXT("node"))
+        return nodetest_type_node;
+
+      break;
+
+    case 'p':
+      if (name == PUGIXML_TEXT("processing-instruction"))
+        return nodetest_type_pi;
+
+      break;
+
+    case 't':
+      if (name == PUGIXML_TEXT("text"))
+        return nodetest_type_text;
+
+      break;
+
+    default:
+      break;
+    }
+
+    return nodetest_none;
+  }
+
+  // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+  xpath_ast_node* parse_primary_expression() {
+    switch (_lexer.current()) {
+    case lex_var_ref: {
+      xpath_lexer_string name = _lexer.contents();
+
+      if (!_variables)
+        throw_error("Unknown variable: variable set is not provided");
+
+      xpath_variable* var = 0;
+      if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
+        throw_error_oom();
+
+      if (!var)
+        throw_error("Unknown variable: variable set does not contain the given name");
+
+      _lexer.next();
+
+      return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
+    }
+
+    case lex_open_brace: {
+      _lexer.next();
+
+      xpath_ast_node* n = parse_expression();
+
+      if (_lexer.current() != lex_close_brace)
+        throw_error("Unmatched braces");
+
+      _lexer.next();
+
+      return n;
+    }
+
+    case lex_quoted_string: {
+      const char_t* value = alloc_string(_lexer.contents());
+
+      xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
+      _lexer.next();
+
+      return n;
+    }
+
+    case lex_number: {
+      double value = 0;
+
+      if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
+        throw_error_oom();
+
+      xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
+      _lexer.next();
+
+      return n;
+    }
+
+    case lex_string: {
+      xpath_ast_node* args[2] = {0};
+      size_t argc = 0;
+
+      xpath_lexer_string function = _lexer.contents();
+      _lexer.next();
+
+      xpath_ast_node* last_arg = 0;
+
+      if (_lexer.current() != lex_open_brace)
+        throw_error("Unrecognized function call");
+      _lexer.next();
+
+      if (_lexer.current() != lex_close_brace)
+        args[argc++] = parse_expression();
+
+      while (_lexer.current() != lex_close_brace) {
+        if (_lexer.current() != lex_comma)
+          throw_error("No comma between function arguments");
+        _lexer.next();
+
+        xpath_ast_node* n = parse_expression();
+
+        if (argc < 2) args[argc] = n;
+        else last_arg->set_next(n);
+
+        argc++;
+        last_arg = n;
+      }
+
+      _lexer.next();
+
+      return parse_function(function, argc, args);
+    }
+
+    default:
+      throw_error("Unrecognizable primary expression");
+
+      return 0;
+    }
+  }
+
+  // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
+  // Predicate ::= '[' PredicateExpr ']'
+  // PredicateExpr ::= Expr
+  xpath_ast_node* parse_filter_expression() {
+    xpath_ast_node* n = parse_primary_expression();
+
+    while (_lexer.current() == lex_open_square_brace) {
+      _lexer.next();
+
+      xpath_ast_node* expr = parse_expression();
+
+      if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
+
+      n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
+
+      if (_lexer.current() != lex_close_square_brace)
+        throw_error("Unmatched square brace");
+
+      _lexer.next();
+    }
+
+    return n;
+  }
+
+  // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+  // AxisSpecifier ::= AxisName '::' | '@'?
+  // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
+  // NameTest ::= '*' | NCName ':' '*' | QName
+  // AbbreviatedStep ::= '.' | '..'
+  xpath_ast_node* parse_step(xpath_ast_node* set) {
+    if (set && set->rettype() != xpath_type_node_set)
+      throw_error("Step has to be applied to node set");
+
+    bool axis_specified = false;
+    axis_t axis = axis_child; // implied child axis
+
+    if (_lexer.current() == lex_axis_attribute) {
+      axis = axis_attribute;
+      axis_specified = true;
+
+      _lexer.next();
+    } else if (_lexer.current() == lex_dot) {
+      _lexer.next();
+
+      return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
+    } else if (_lexer.current() == lex_double_dot) {
+      _lexer.next();
+
+      return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
+    }
+
+    nodetest_t nt_type = nodetest_none;
+    xpath_lexer_string nt_name;
+
+    if (_lexer.current() == lex_string) {
+      // node name test
+      nt_name = _lexer.contents();
+      _lexer.next();
+
+      // was it an axis name?
+      if (_lexer.current() == lex_double_colon) {
+        // parse axis name
+        if (axis_specified) throw_error("Two axis specifiers in one step");
+
+        axis = parse_axis_name(nt_name, axis_specified);
+
+        if (!axis_specified) throw_error("Unknown axis");
+
+        // read actual node test
+        _lexer.next();
+
+        if (_lexer.current() == lex_multiply) {
+          nt_type = nodetest_all;
+          nt_name = xpath_lexer_string();
+          _lexer.next();
+        } else if (_lexer.current() == lex_string) {
+          nt_name = _lexer.contents();
+          _lexer.next();
+        } else throw_error("Unrecognized node test");
+      }
+
+      if (nt_type == nodetest_none) {
+        // node type test or processing-instruction
+        if (_lexer.current() == lex_open_brace) {
+          _lexer.next();
+
+          if (_lexer.current() == lex_close_brace) {
+            _lexer.next();
+
+            nt_type = parse_node_test_type(nt_name);
+
+            if (nt_type == nodetest_none) throw_error("Unrecognized node type");
+
+            nt_name = xpath_lexer_string();
+          } else if (nt_name == PUGIXML_TEXT("processing-instruction")) {
+            if (_lexer.current() != lex_quoted_string)
+              throw_error("Only literals are allowed as arguments to processing-instruction()");
+
+            nt_type = nodetest_pi;
+            nt_name = _lexer.contents();
+            _lexer.next();
+
+            if (_lexer.current() != lex_close_brace)
+              throw_error("Unmatched brace near processing-instruction()");
+            _lexer.next();
+          } else
+            throw_error("Unmatched brace near node type test");
+
+        }
+        // QName or NCName:*
+        else {
+          if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') { // NCName:*
+            nt_name.end--; // erase *
+
+            nt_type = nodetest_all_in_namespace;
+          } else nt_type = nodetest_name;
+        }
+      }
+    } else if (_lexer.current() == lex_multiply) {
+      nt_type = nodetest_all;
+      _lexer.next();
+    } else throw_error("Unrecognized node test");
+
+    xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
+
+    xpath_ast_node* last = 0;
+
+    while (_lexer.current() == lex_open_square_brace) {
+      _lexer.next();
+
+      xpath_ast_node* expr = parse_expression();
+
+      xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
+
+      if (_lexer.current() != lex_close_square_brace)
+        throw_error("Unmatched square brace");
+      _lexer.next();
+
+      if (last) last->set_next(pred);
+      else n->set_right(pred);
+
+      last = pred;
+    }
+
+    return n;
+  }
+
+  // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
+  xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) {
+    xpath_ast_node* n = parse_step(set);
+
+    while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) {
+      lexeme_t l = _lexer.current();
+      _lexer.next();
+
+      if (l == lex_double_slash)
+        n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+      n = parse_step(n);
+    }
+
+    return n;
+  }
+
+  // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+  // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
+  xpath_ast_node* parse_location_path() {
+    if (_lexer.current() == lex_slash) {
+      _lexer.next();
+
+      xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+
+      // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
+      lexeme_t l = _lexer.current();
+
+      if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
+        return parse_relative_location_path(n);
+      else
+        return n;
+    } else if (_lexer.current() == lex_double_slash) {
+      _lexer.next();
+
+      xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
+      n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+
+      return parse_relative_location_path(n);
+    }
+
+    // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
+    return parse_relative_location_path(0);
+  }
+
+  // PathExpr ::= LocationPath
+  //				| FilterExpr
+  //				| FilterExpr '/' RelativeLocationPath
+  //				| FilterExpr '//' RelativeLocationPath
+  // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+  // UnaryExpr ::= UnionExpr | '-' UnaryExpr
+  xpath_ast_node* parse_path_or_unary_expression() {
+    // Clarification.
+    // PathExpr begins with either LocationPath or FilterExpr.
+    // FilterExpr begins with PrimaryExpr
+    // PrimaryExpr begins with '$' in case of it being a variable reference,
+    // '(' in case of it being an expression, string literal, number constant or
+    // function call.
+
+    if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
+        _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
+        _lexer.current() == lex_string) {
+      if (_lexer.current() == lex_string) {
+        // This is either a function call, or not - if not, we shall proceed with location path
+        const char_t* state = _lexer.state();
+
+        while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
+
+        if (*state != '(') return parse_location_path();
+
+        // This looks like a function call; however this still can be a node-test. Check it.
+        if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
+      }
+
+      xpath_ast_node* n = parse_filter_expression();
+
+      if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) {
+        lexeme_t l = _lexer.current();
+        _lexer.next();
+
+        if (l == lex_double_slash) {
+          if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
+
+          n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
+        }
+
+        // select from location path
+        return parse_relative_location_path(n);
+      }
+
+      return n;
+    } else if (_lexer.current() == lex_minus) {
+      _lexer.next();
+
+      // precedence 7+ - only parses union expressions
+      xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
+
+      return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
+    } else
+      return parse_location_path();
+  }
+
+  struct binary_op_t {
+    ast_type_t asttype;
+    xpath_value_type rettype;
+    int precedence;
+
+    binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) {
+    }
+
+    binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) {
+    }
+
+    static binary_op_t parse(xpath_lexer& lexer) {
+      switch (lexer.current()) {
+      case lex_string:
+        if (lexer.contents() == PUGIXML_TEXT("or"))
+          return binary_op_t(ast_op_or, xpath_type_boolean, 1);
+        else if (lexer.contents() == PUGIXML_TEXT("and"))
+          return binary_op_t(ast_op_and, xpath_type_boolean, 2);
+        else if (lexer.contents() == PUGIXML_TEXT("div"))
+          return binary_op_t(ast_op_divide, xpath_type_number, 6);
+        else if (lexer.contents() == PUGIXML_TEXT("mod"))
+          return binary_op_t(ast_op_mod, xpath_type_number, 6);
+        else
+          return binary_op_t();
+
+      case lex_equal:
+        return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
+
+      case lex_not_equal:
+        return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
+
+      case lex_less:
+        return binary_op_t(ast_op_less, xpath_type_boolean, 4);
+
+      case lex_greater:
+        return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
+
+      case lex_less_or_equal:
+        return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
+
+      case lex_greater_or_equal:
+        return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
+
+      case lex_plus:
+        return binary_op_t(ast_op_add, xpath_type_number, 5);
+
+      case lex_minus:
+        return binary_op_t(ast_op_subtract, xpath_type_number, 5);
+
+      case lex_multiply:
+        return binary_op_t(ast_op_multiply, xpath_type_number, 6);
+
+      case lex_union:
+        return binary_op_t(ast_op_union, xpath_type_node_set, 7);
+
+      default:
+        return binary_op_t();
+      }
+    }
+  };
+
+  xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) {
+    binary_op_t op = binary_op_t::parse(_lexer);
+
+    while (op.asttype != ast_unknown && op.precedence >= limit) {
+      _lexer.next();
+
+      xpath_ast_node* rhs = parse_path_or_unary_expression();
+
+      binary_op_t nextop = binary_op_t::parse(_lexer);
+
+      while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) {
+        rhs = parse_expression_rec(rhs, nextop.precedence);
+
+        nextop = binary_op_t::parse(_lexer);
+      }
+
+      if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
+        throw_error("Union operator has to be applied to node sets");
+
+      lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
+
+      op = binary_op_t::parse(_lexer);
+    }
+
+    return lhs;
+  }
+
+  // Expr ::= OrExpr
+  // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+  // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+  // EqualityExpr ::= RelationalExpr
+  //					| EqualityExpr '=' RelationalExpr
+  //					| EqualityExpr '!=' RelationalExpr
+  // RelationalExpr ::= AdditiveExpr
+  //					  | RelationalExpr '<' AdditiveExpr
+  //					  | RelationalExpr '>' AdditiveExpr
+  //					  | RelationalExpr '<=' AdditiveExpr
+  //					  | RelationalExpr '>=' AdditiveExpr
+  // AdditiveExpr ::= MultiplicativeExpr
+  //					| AdditiveExpr '+' MultiplicativeExpr
+  //					| AdditiveExpr '-' MultiplicativeExpr
+  // MultiplicativeExpr ::= UnaryExpr
+  //						  | MultiplicativeExpr '*' UnaryExpr
+  //						  | MultiplicativeExpr 'div' UnaryExpr
+  //						  | MultiplicativeExpr 'mod' UnaryExpr
+  xpath_ast_node* parse_expression() {
+    return parse_expression_rec(parse_path_or_unary_expression(), 0);
+  }
+
+  xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) {
+  }
+
+  xpath_ast_node* parse() {
+    xpath_ast_node* result = parse_expression();
+
+    if (_lexer.current() != lex_eof) {
+      // there are still unparsed tokens left, error
+      throw_error("Incorrect query");
+    }
+
+    return result;
+  }
+
+  static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) {
+    xpath_parser parser(query, variables, alloc, result);
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+    int error = setjmp(parser._error_handler);
+
+    return (error == 0) ? parser.parse() : 0;
+#else
+    return parser.parse();
+#endif
+  }
+};
+
+struct xpath_query_impl {
+  static xpath_query_impl* create() {
+    void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
+    if (!memory) return 0;
+
+    return new (memory) xpath_query_impl();
+  }
+
+  static void destroy(xpath_query_impl* impl) {
+    // free all allocated pages
+    impl->alloc.release();
+
+    // free allocator memory (with the first page)
+    xml_memory::deallocate(impl);
+  }
+
+  xpath_query_impl(): root(0), alloc(&block) {
+    block.next = 0;
+    block.capacity = sizeof(block.data);
+  }
+
+  xpath_ast_node* root;
+  xpath_allocator alloc;
+  xpath_memory_block block;
+};
+
+PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
+{
+  if (!impl) return xpath_string();
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  if (setjmp(sd.error_handler)) return xpath_string();
+#endif
+
+  xpath_context c(n, 1, 1);
+
+  return impl->root->eval_string(c, sd.stack);
+}
+
+PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
+{
+  if (!impl) return 0;
+
+  if (impl->root->rettype() != xpath_type_node_set) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+    return 0;
+#else
+    xpath_parse_result res;
+    res.error = "Expression does not evaluate to node set";
+
+    throw xpath_exception(res);
+#endif
+  }
+
+  return impl->root;
+}
+PUGI__NS_END
+
+namespace pugi
+{
+#ifndef PUGIXML_NO_EXCEPTIONS
+PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
+{
+  assert(_result.error);
+}
+
+PUGI__FN const char* xpath_exception::what() const throw()
+{
+  return _result.error;
+}
+
+PUGI__FN const xpath_parse_result& xpath_exception::result() const
+{
+  return _result;
+}
+#endif
+
+PUGI__FN xpath_node::xpath_node()
+{
+}
+
+PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
+{
+}
+
+PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
+{
+}
+
+PUGI__FN xml_node xpath_node::node() const
+{
+  return _attribute ? xml_node() : _node;
+}
+
+PUGI__FN xml_attribute xpath_node::attribute() const
+{
+  return _attribute;
+}
+
+PUGI__FN xml_node xpath_node::parent() const
+{
+  return _attribute ? _node : _node.parent();
+}
+
+PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
+{
+}
+
+PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
+{
+  return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
+}
+
+PUGI__FN bool xpath_node::operator!() const
+{
+  return !(_node || _attribute);
+}
+
+PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
+{
+  return _node == n._node && _attribute == n._attribute;
+}
+
+PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
+{
+  return _node != n._node || _attribute != n._attribute;
+}
+
+#ifdef __BORLANDC__
+PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
+{
+  return (bool)lhs && rhs;
+}
+
+PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
+{
+  return (bool)lhs || rhs;
+}
+#endif
+
+PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
+{
+  assert(begin_ <= end_);
+
+  size_t size_ = static_cast<size_t>(end_ - begin_);
+
+  if (size_ <= 1) {
+    // deallocate old buffer
+    if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+    // use internal buffer
+    if (begin_ != end_) _storage = *begin_;
+
+    _begin = &_storage;
+    _end = &_storage + size_;
+    _type = type_;
+  } else {
+    // make heap copy
+    xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
+
+    if (!storage) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+      return;
+#else
+      throw std::bad_alloc();
+#endif
+    }
+
+    memcpy(storage, begin_, size_ * sizeof(xpath_node));
+
+    // deallocate old buffer
+    if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
+
+    // finalize
+    _begin = storage;
+    _end = storage + size_;
+    _type = type_;
+  }
+}
+
+#if __cplusplus >= 201103
+PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
+{
+  _type = rhs._type;
+  _storage = rhs._storage;
+  _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
+  _end = _begin + (rhs._end - rhs._begin);
+
+  rhs._type = type_unsorted;
+  rhs._begin = &rhs._storage;
+  rhs._end = rhs._begin;
+}
+#endif
+
+PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+  _assign(begin_, end_, type_);
+}
+
+PUGI__FN xpath_node_set::~xpath_node_set()
+{
+  if (_begin != &_storage)
+    impl::xml_memory::deallocate(_begin);
+}
+
+PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+  _assign(ns._begin, ns._end, ns._type);
+}
+
+PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
+{
+  if (this == &ns) return *this;
+
+  _assign(ns._begin, ns._end, ns._type);
+
+  return *this;
+}
+
+#if __cplusplus >= 201103
+PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
+{
+  _move(rhs);
+}
+
+PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
+{
+  if (this == &rhs) return *this;
+
+  if (_begin != &_storage)
+    impl::xml_memory::deallocate(_begin);
+
+  _move(rhs);
+
+  return *this;
+}
+#endif
+
+PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
+{
+  return _type;
+}
+
+PUGI__FN size_t xpath_node_set::size() const
+{
+  return _end - _begin;
+}
+
+PUGI__FN bool xpath_node_set::empty() const
+{
+  return _begin == _end;
+}
+
+PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
+{
+  assert(index < size());
+  return _begin[index];
+}
+
+PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
+{
+  return _begin;
+}
+
+PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
+{
+  return _end;
+}
+
+PUGI__FN void xpath_node_set::sort(bool reverse)
+{
+  _type = impl::xpath_sort(_begin, _end, _type, reverse);
+}
+
+PUGI__FN xpath_node xpath_node_set::first() const
+{
+  return impl::xpath_first(_begin, _end, _type);
+}
+
+PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
+{
+}
+
+PUGI__FN xpath_parse_result::operator bool() const
+{
+  return error == 0;
+}
+
+PUGI__FN const char* xpath_parse_result::description() const
+{
+  return error ? error : "No error";
+}
+
+PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
+{
+}
+
+PUGI__FN const char_t* xpath_variable::name() const
+{
+  switch (_type) {
+  case xpath_type_node_set:
+    return static_cast<const impl::xpath_variable_node_set*>(this)->name;
+
+  case xpath_type_number:
+    return static_cast<const impl::xpath_variable_number*>(this)->name;
+
+  case xpath_type_string:
+    return static_cast<const impl::xpath_variable_string*>(this)->name;
+
+  case xpath_type_boolean:
+    return static_cast<const impl::xpath_variable_boolean*>(this)->name;
+
+  default:
+    assert(!"Invalid variable type");
+    return 0;
+  }
+}
+
+PUGI__FN xpath_value_type xpath_variable::type() const
+{
+  return _type;
+}
+
+PUGI__FN bool xpath_variable::get_boolean() const
+{
+  return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
+}
+
+PUGI__FN double xpath_variable::get_number() const
+{
+  return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
+}
+
+PUGI__FN const char_t* xpath_variable::get_string() const
+{
+  const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
+  return value ? value : PUGIXML_TEXT("");
+}
+
+PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
+{
+  return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
+}
+
+PUGI__FN bool xpath_variable::set(bool value)
+{
+  if (_type != xpath_type_boolean) return false;
+
+  static_cast<impl::xpath_variable_boolean*>(this)->value = value;
+  return true;
+}
+
+PUGI__FN bool xpath_variable::set(double value)
+{
+  if (_type != xpath_type_number) return false;
+
+  static_cast<impl::xpath_variable_number*>(this)->value = value;
+  return true;
+}
+
+PUGI__FN bool xpath_variable::set(const char_t* value)
+{
+  if (_type != xpath_type_string) return false;
+
+  impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
+
+  // duplicate string
+  size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
+
+  char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
+  if (!copy) return false;
+
+  memcpy(copy, value, size);
+
+  // replace old string
+  if (var->value) impl::xml_memory::deallocate(var->value);
+  var->value = copy;
+
+  return true;
+}
+
+PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
+{
+  if (_type != xpath_type_node_set) return false;
+
+  static_cast<impl::xpath_variable_node_set*>(this)->value = value;
+  return true;
+}
+
+PUGI__FN xpath_variable_set::xpath_variable_set()
+{
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+    _data[i] = 0;
+}
+
+PUGI__FN xpath_variable_set::~xpath_variable_set()
+{
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+    _destroy(_data[i]);
+}
+
+PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
+{
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+    _data[i] = 0;
+
+  _assign(rhs);
+}
+
+PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
+{
+  if (this == &rhs) return *this;
+
+  _assign(rhs);
+
+  return *this;
+}
+
+#if __cplusplus >= 201103
+PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
+{
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+    _data[i] = rhs._data[i];
+    rhs._data[i] = 0;
+  }
+}
+
+PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
+{
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+    _destroy(_data[i]);
+
+    _data[i] = rhs._data[i];
+    rhs._data[i] = 0;
+  }
+
+  return *this;
+}
+#endif
+
+PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
+{
+  xpath_variable_set temp;
+
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
+    if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
+      return;
+
+  _swap(temp);
+}
+
+PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
+{
+  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
+    xpath_variable* chain = _data[i];
+
+    _data[i] = rhs._data[i];
+    rhs._data[i] = chain;
+  }
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
+{
+  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+  size_t hash = impl::hash_string(name) % hash_size;
+
+  // look for existing variable
+  for (xpath_variable* var = _data[hash]; var; var = var->_next)
+    if (impl::strequal(var->name(), name))
+      return var;
+
+  return 0;
+}
+
+PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
+{
+  xpath_variable* last = 0;
+
+  while (var) {
+    // allocate storage for new variable
+    xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
+    if (!nvar) return false;
+
+    // link the variable to the result immediately to handle failures gracefully
+    if (last)
+      last->_next = nvar;
+    else
+      *out_result = nvar;
+
+    last = nvar;
+
+    // copy the value; this can fail due to out-of-memory conditions
+    if (!impl::copy_xpath_variable(nvar, var)) return false;
+
+    var = var->_next;
+  }
+
+  return true;
+}
+
+PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
+{
+  while (var) {
+    xpath_variable* next = var->_next;
+
+    impl::delete_xpath_variable(var->_type, var);
+
+    var = next;
+  }
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
+{
+  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
+  size_t hash = impl::hash_string(name) % hash_size;
+
+  // look for existing variable
+  for (xpath_variable* var = _data[hash]; var; var = var->_next)
+    if (impl::strequal(var->name(), name))
+      return var->type() == type ? var : 0;
+
+  // add new variable
+  xpath_variable* result = impl::new_xpath_variable(type, name);
+
+  if (result) {
+    result->_next = _data[hash];
+
+    _data[hash] = result;
+  }
+
+  return result;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
+{
+  xpath_variable* var = add(name, xpath_type_boolean);
+  return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
+{
+  xpath_variable* var = add(name, xpath_type_number);
+  return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
+{
+  xpath_variable* var = add(name, xpath_type_string);
+  return var ? var->set(value) : false;
+}
+
+PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
+{
+  xpath_variable* var = add(name, xpath_type_node_set);
+  return var ? var->set(value) : false;
+}
+
+PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
+{
+  return _find(name);
+}
+
+PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
+{
+  return _find(name);
+}
+
+PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
+{
+  impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
+
+  if (!qimpl) {
+#ifdef PUGIXML_NO_EXCEPTIONS
+    _result.error = "Out of memory";
+#else
+    throw std::bad_alloc();
+#endif
+  } else {
+    using impl::auto_deleter; // MSVC7 workaround
+    auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
+
+    qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
+
+    if (qimpl->root) {
+      qimpl->root->optimize(&qimpl->alloc);
+
+      _impl = impl.release();
+      _result.error = 0;
+    }
+  }
+}
+
+PUGI__FN xpath_query::xpath_query(): _impl(0)
+{
+}
+
+PUGI__FN xpath_query::~xpath_query()
+{
+  if (_impl)
+    impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
+}
+
+#if __cplusplus >= 201103
+PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
+{
+  _impl = rhs._impl;
+  _result = rhs._result;
+  rhs._impl = 0;
+  rhs._result = xpath_parse_result();
+}
+
+PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
+{
+  if (this == &rhs) return *this;
+
+  if (_impl)
+    impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
+
+  _impl = rhs._impl;
+  _result = rhs._result;
+  rhs._impl = 0;
+  rhs._result = xpath_parse_result();
+
+  return *this;
+}
+#endif
+
+PUGI__FN xpath_value_type xpath_query::return_type() const
+{
+  if (!_impl) return xpath_type_none;
+
+  return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
+}
+
+PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
+{
+  if (!_impl) return false;
+
+  impl::xpath_context c(n, 1, 1);
+  impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  if (setjmp(sd.error_handler)) return false;
+#endif
+
+  return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
+}
+
+PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
+{
+  if (!_impl) return impl::gen_nan();
+
+  impl::xpath_context c(n, 1, 1);
+  impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  if (setjmp(sd.error_handler)) return impl::gen_nan();
+#endif
+
+  return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
+}
+
+#ifndef PUGIXML_NO_STL
+PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
+{
+  impl::xpath_stack_data sd;
+
+  impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+  return string_t(r.c_str(), r.length());
+}
+#endif
+
+PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
+{
+  impl::xpath_stack_data sd;
+
+  impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
+
+  size_t full_size = r.length() + 1;
+
+  if (capacity > 0) {
+    size_t size = (full_size < capacity) ? full_size : capacity;
+    assert(size > 0);
+
+    memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
+    buffer[size - 1] = 0;
+  }
+
+  return full_size;
+}
+
+PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
+{
+  impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
+  if (!root) return xpath_node_set();
+
+  impl::xpath_context c(n, 1, 1);
+  impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  if (setjmp(sd.error_handler)) return xpath_node_set();
+#endif
+
+  impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
+
+  return xpath_node_set(r.begin(), r.end(), r.type());
+}
+
+PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
+{
+  impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
+  if (!root) return xpath_node();
+
+  impl::xpath_context c(n, 1, 1);
+  impl::xpath_stack_data sd;
+
+#ifdef PUGIXML_NO_EXCEPTIONS
+  if (setjmp(sd.error_handler)) return xpath_node();
+#endif
+
+  impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
+
+  return r.first();
+}
+
+PUGI__FN const xpath_parse_result& xpath_query::result() const
+{
+  return _result;
+}
+
+PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
+{
+}
+
+PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
+{
+  return _impl ? unspecified_bool_xpath_query : 0;
+}
+
+PUGI__FN bool xpath_query::operator!() const
+{
+  return !_impl;
+}
+
+PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
+{
+  xpath_query q(query, variables);
+  return select_node(q);
+}
+
+PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
+{
+  return query.evaluate_node(*this);
+}
+
+PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
+{
+  xpath_query q(query, variables);
+  return select_nodes(q);
+}
+
+PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
+{
+  return query.evaluate_node_set(*this);
+}
+
+PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
+{
+  xpath_query q(query, variables);
+  return select_single_node(q);
+}
+
+PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
+{
+  return query.evaluate_node(*this);
+}
+}
+
+#endif
+
+#ifdef __BORLANDC__
+#	pragma option pop
+#endif
+
+// Intel C++ does not properly keep warning state for function templates,
+// so popping warning state at the end of translation unit leads to warnings in the middle.
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+#	pragma warning(pop)
+#endif
+
+// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
+#undef PUGI__NO_INLINE
+#undef PUGI__UNLIKELY
+#undef PUGI__STATIC_ASSERT
+#undef PUGI__DMC_VOLATILE
+#undef PUGI__MSVC_CRT_VERSION
+#undef PUGI__NS_BEGIN
+#undef PUGI__NS_END
+#undef PUGI__FN
+#undef PUGI__FN_NO_INLINE
+#undef PUGI__GETPAGE_IMPL
+#undef PUGI__GETPAGE
+#undef PUGI__NODETYPE
+#undef PUGI__IS_CHARTYPE_IMPL
+#undef PUGI__IS_CHARTYPE
+#undef PUGI__IS_CHARTYPEX
+#undef PUGI__ENDSWITH
+#undef PUGI__SKIPWS
+#undef PUGI__OPTSET
+#undef PUGI__PUSHNODE
+#undef PUGI__POPNODE
+#undef PUGI__SCANFOR
+#undef PUGI__SCANWHILE
+#undef PUGI__SCANWHILE_UNROLL
+#undef PUGI__ENDSEG
+#undef PUGI__THROW_ERROR
+#undef PUGI__CHECK_ERROR
+
+#endif
+
+/**
+ * Copyright (c) 2006-2015 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/mosesdecoder/moses2/pugixml.hpp b/mosesdecoder/moses2/pugixml.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..13bf7917b727865b40dfeb4bb547e9add706000b
--- /dev/null
+++ b/mosesdecoder/moses2/pugixml.hpp
@@ -0,0 +1,1391 @@
+/**
+ * pugixml parser - version 1.7
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef PUGIXML_VERSION
+// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
+#	define PUGIXML_VERSION 170
+#endif
+
+// Include user configuration file (this can define various configuration macros)
+#include "pugiconfig.hpp"
+
+#ifndef HEADER_PUGIXML_HPP
+#define HEADER_PUGIXML_HPP
+
+// Include stddef.h for size_t and ptrdiff_t
+#include <stddef.h>
+
+// Include exception header for XPath
+#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
+#	include <exception>
+#endif
+
+// Include STL headers
+#ifndef PUGIXML_NO_STL
+#	include <iterator>
+#	include <iosfwd>
+#	include <string>
+#endif
+
+// Macro for deprecated features
+#ifndef PUGIXML_DEPRECATED
+#	if defined(__GNUC__)
+#		define PUGIXML_DEPRECATED __attribute__((deprecated))
+#	elif defined(_MSC_VER) && _MSC_VER >= 1300
+#		define PUGIXML_DEPRECATED __declspec(deprecated)
+#	else
+#		define PUGIXML_DEPRECATED
+#	endif
+#endif
+
+// If no API is defined, assume default
+#ifndef PUGIXML_API
+#	define PUGIXML_API
+#endif
+
+// If no API for classes is defined, assume default
+#ifndef PUGIXML_CLASS
+#	define PUGIXML_CLASS PUGIXML_API
+#endif
+
+// If no API for functions is defined, assume default
+#ifndef PUGIXML_FUNCTION
+#	define PUGIXML_FUNCTION PUGIXML_API
+#endif
+
+// If the platform is known to have long long support, enable long long functions
+#ifndef PUGIXML_HAS_LONG_LONG
+#	if __cplusplus >= 201103
+#		define PUGIXML_HAS_LONG_LONG
+#	elif defined(_MSC_VER) && _MSC_VER >= 1400
+#		define PUGIXML_HAS_LONG_LONG
+#	endif
+#endif
+
+// Character interface macros
+#ifdef PUGIXML_WCHAR_MODE
+#	define PUGIXML_TEXT(t) L ## t
+#	define PUGIXML_CHAR wchar_t
+#else
+#	define PUGIXML_TEXT(t) t
+#	define PUGIXML_CHAR char
+#endif
+
+namespace pugi
+{
+// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
+typedef PUGIXML_CHAR char_t;
+
+#ifndef PUGIXML_NO_STL
+// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
+typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
+#endif
+}
+
+// The PugiXML namespace
+namespace pugi
+{
+// Tree node types
+enum xml_node_type {
+  node_null,			// Empty (null) node handle
+  node_document,		// A document tree's absolute root
+  node_element,		// Element tag, i.e. '<node/>'
+  node_pcdata,		// Plain character data, i.e. 'text'
+  node_cdata,			// Character data, i.e. '<![CDATA[text]]>'
+  node_comment,		// Comment tag, i.e. '<!-- text -->'
+  node_pi,			// Processing instruction, i.e. '<?name?>'
+  node_declaration,	// Document declaration, i.e. '<?xml version="1.0"?>'
+  node_doctype		// Document type declaration, i.e. '<!DOCTYPE doc>'
+};
+
+// Parsing options
+
+// Minimal parsing mode (equivalent to turning all other flags off).
+// Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
+const unsigned int parse_minimal = 0x0000;
+
+// This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
+const unsigned int parse_pi = 0x0001;
+
+// This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
+const unsigned int parse_comments = 0x0002;
+
+// This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
+const unsigned int parse_cdata = 0x0004;
+
+// This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
+// This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
+const unsigned int parse_ws_pcdata = 0x0008;
+
+// This flag determines if character and entity references are expanded during parsing. This flag is on by default.
+const unsigned int parse_escapes = 0x0010;
+
+// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
+const unsigned int parse_eol = 0x0020;
+
+// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
+const unsigned int parse_wconv_attribute = 0x0040;
+
+// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
+const unsigned int parse_wnorm_attribute = 0x0080;
+
+// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
+const unsigned int parse_declaration = 0x0100;
+
+// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
+const unsigned int parse_doctype = 0x0200;
+
+// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
+// of whitespace is added to the DOM tree.
+// This flag is off by default; turning it on may result in slower parsing and more memory consumption.
+const unsigned int parse_ws_pcdata_single = 0x0400;
+
+// This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default.
+const unsigned int parse_trim_pcdata = 0x0800;
+
+// This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document
+// is a valid document. This flag is off by default.
+const unsigned int parse_fragment = 0x1000;
+
+// The default parsing mode.
+// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
+// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
+
+// The full parsing mode.
+// Nodes of all types are added to the DOM tree, character/reference entities are expanded,
+// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
+
+// These flags determine the encoding of input data for XML document
+enum xml_encoding {
+  encoding_auto,		// Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
+  encoding_utf8,		// UTF8 encoding
+  encoding_utf16_le,	// Little-endian UTF16
+  encoding_utf16_be,	// Big-endian UTF16
+  encoding_utf16,		// UTF16 with native endianness
+  encoding_utf32_le,	// Little-endian UTF32
+  encoding_utf32_be,	// Big-endian UTF32
+  encoding_utf32,		// UTF32 with native endianness
+  encoding_wchar,		// The same encoding wchar_t has (either UTF16 or UTF32)
+  encoding_latin1
+};
+
+// Formatting flags
+
+// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
+const unsigned int format_indent = 0x01;
+
+// Write encoding-specific BOM to the output stream. This flag is off by default.
+const unsigned int format_write_bom = 0x02;
+
+// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
+const unsigned int format_raw = 0x04;
+
+// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
+const unsigned int format_no_declaration = 0x08;
+
+// Don't escape attribute values and PCDATA contents. This flag is off by default.
+const unsigned int format_no_escapes = 0x10;
+
+// Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
+const unsigned int format_save_file_text = 0x20;
+
+// Write every attribute on a new line with appropriate indentation. This flag is off by default.
+const unsigned int format_indent_attributes = 0x40;
+
+// The default set of formatting flags.
+// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
+const unsigned int format_default = format_indent;
+
+// Forward declarations
+struct xml_attribute_struct;
+struct xml_node_struct;
+
+class xml_node_iterator;
+class xml_attribute_iterator;
+class xml_named_node_iterator;
+
+class xml_tree_walker;
+
+struct xml_parse_result;
+
+class xml_node;
+
+class xml_text;
+
+#ifndef PUGIXML_NO_XPATH
+class xpath_node;
+class xpath_node_set;
+class xpath_query;
+class xpath_variable_set;
+#endif
+
+// Range-based for loop support
+template <typename It> class xml_object_range
+{
+public:
+  typedef It const_iterator;
+  typedef It iterator;
+
+  xml_object_range(It b, It e): _begin(b), _end(e) {
+  }
+
+  It begin() const {
+    return _begin;
+  }
+  It end() const {
+    return _end;
+  }
+
+private:
+  It _begin, _end;
+};
+
+// Writer interface for node printing (see xml_node::print)
+class PUGIXML_CLASS xml_writer
+{
+public:
+  virtual ~xml_writer() {}
+
+  // Write memory chunk into stream/file/whatever
+  virtual void write(const void* data, size_t size) = 0;
+};
+
+// xml_writer implementation for FILE*
+class PUGIXML_CLASS xml_writer_file: public xml_writer
+{
+public:
+  // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
+  xml_writer_file(void* file);
+
+  virtual void write(const void* data, size_t size);
+
+private:
+  void* file;
+};
+
+#ifndef PUGIXML_NO_STL
+// xml_writer implementation for streams
+class PUGIXML_CLASS xml_writer_stream: public xml_writer
+{
+public:
+  // Construct writer from an output stream object
+  xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
+  xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
+
+  virtual void write(const void* data, size_t size);
+
+private:
+  std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
+  std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
+};
+#endif
+
+// A light-weight handle for manipulating attributes in DOM tree
+class PUGIXML_CLASS xml_attribute
+{
+  friend class xml_attribute_iterator;
+  friend class xml_node;
+
+private:
+  xml_attribute_struct* _attr;
+
+  typedef void (*unspecified_bool_type)(xml_attribute***);
+
+public:
+  // Default constructor. Constructs an empty attribute.
+  xml_attribute();
+
+  // Constructs attribute from internal pointer
+  explicit xml_attribute(xml_attribute_struct* attr);
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Comparison operators (compares wrapped attribute pointers)
+  bool operator==(const xml_attribute& r) const;
+  bool operator!=(const xml_attribute& r) const;
+  bool operator<(const xml_attribute& r) const;
+  bool operator>(const xml_attribute& r) const;
+  bool operator<=(const xml_attribute& r) const;
+  bool operator>=(const xml_attribute& r) const;
+
+  // Check if attribute is empty
+  bool empty() const;
+
+  // Get attribute name/value, or "" if attribute is empty
+  const char_t* name() const;
+  const char_t* value() const;
+
+  // Get attribute value, or the default value if attribute is empty
+  const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+  // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
+  int as_int(int def = 0) const;
+  unsigned int as_uint(unsigned int def = 0) const;
+  double as_double(double def = 0) const;
+  float as_float(float def = 0) const;
+
+#ifdef PUGIXML_HAS_LONG_LONG
+  long long as_llong(long long def = 0) const;
+  unsigned long long as_ullong(unsigned long long def = 0) const;
+#endif
+
+  // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
+  bool as_bool(bool def = false) const;
+
+  // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
+  bool set_name(const char_t* rhs);
+  bool set_value(const char_t* rhs);
+
+  // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+  bool set_value(int rhs);
+  bool set_value(unsigned int rhs);
+  bool set_value(double rhs);
+  bool set_value(float rhs);
+  bool set_value(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+  bool set_value(long long rhs);
+  bool set_value(unsigned long long rhs);
+#endif
+
+  // Set attribute value (equivalent to set_value without error checking)
+  xml_attribute& operator=(const char_t* rhs);
+  xml_attribute& operator=(int rhs);
+  xml_attribute& operator=(unsigned int rhs);
+  xml_attribute& operator=(double rhs);
+  xml_attribute& operator=(float rhs);
+  xml_attribute& operator=(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+  xml_attribute& operator=(long long rhs);
+  xml_attribute& operator=(unsigned long long rhs);
+#endif
+
+  // Get next/previous attribute in the attribute list of the parent node
+  xml_attribute next_attribute() const;
+  xml_attribute previous_attribute() const;
+
+  // Get hash value (unique for handles to the same object)
+  size_t hash_value() const;
+
+  // Get internal pointer
+  xml_attribute_struct* internal_object() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
+#endif
+
+// A light-weight handle for manipulating nodes in DOM tree
+class PUGIXML_CLASS xml_node
+{
+  friend class xml_attribute_iterator;
+  friend class xml_node_iterator;
+  friend class xml_named_node_iterator;
+
+protected:
+  xml_node_struct* _root;
+
+  typedef void (*unspecified_bool_type)(xml_node***);
+
+public:
+  // Default constructor. Constructs an empty node.
+  xml_node();
+
+  // Constructs node from internal pointer
+  explicit xml_node(xml_node_struct* p);
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Comparison operators (compares wrapped node pointers)
+  bool operator==(const xml_node& r) const;
+  bool operator!=(const xml_node& r) const;
+  bool operator<(const xml_node& r) const;
+  bool operator>(const xml_node& r) const;
+  bool operator<=(const xml_node& r) const;
+  bool operator>=(const xml_node& r) const;
+
+  // Check if node is empty.
+  bool empty() const;
+
+  // Get node type
+  xml_node_type type() const;
+
+  // Get node name, or "" if node is empty or it has no name
+  const char_t* name() const;
+
+  // Get node value, or "" if node is empty or it has no value
+  // Note: For <node>text</node> node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes.
+  const char_t* value() const;
+
+  // Get attribute list
+  xml_attribute first_attribute() const;
+  xml_attribute last_attribute() const;
+
+  // Get children list
+  xml_node first_child() const;
+  xml_node last_child() const;
+
+  // Get next/previous sibling in the children list of the parent node
+  xml_node next_sibling() const;
+  xml_node previous_sibling() const;
+
+  // Get parent node
+  xml_node parent() const;
+
+  // Get root of DOM tree this node belongs to
+  xml_node root() const;
+
+  // Get text object for the current node
+  xml_text text() const;
+
+  // Get child, attribute or next/previous sibling with the specified name
+  xml_node child(const char_t* name) const;
+  xml_attribute attribute(const char_t* name) const;
+  xml_node next_sibling(const char_t* name) const;
+  xml_node previous_sibling(const char_t* name) const;
+
+  // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast)
+  xml_attribute attribute(const char_t* name, xml_attribute& hint) const;
+
+  // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
+  const char_t* child_value() const;
+
+  // Get child value of child with specified name. Equivalent to child(name).child_value().
+  const char_t* child_value(const char_t* name) const;
+
+  // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
+  bool set_name(const char_t* rhs);
+  bool set_value(const char_t* rhs);
+
+  // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
+  xml_attribute append_attribute(const char_t* name);
+  xml_attribute prepend_attribute(const char_t* name);
+  xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
+  xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
+
+  // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
+  xml_attribute append_copy(const xml_attribute& proto);
+  xml_attribute prepend_copy(const xml_attribute& proto);
+  xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
+  xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
+
+  // Add child node with specified type. Returns added node, or empty node on errors.
+  xml_node append_child(xml_node_type type = node_element);
+  xml_node prepend_child(xml_node_type type = node_element);
+  xml_node insert_child_after(xml_node_type type, const xml_node& node);
+  xml_node insert_child_before(xml_node_type type, const xml_node& node);
+
+  // Add child element with specified name. Returns added node, or empty node on errors.
+  xml_node append_child(const char_t* name);
+  xml_node prepend_child(const char_t* name);
+  xml_node insert_child_after(const char_t* name, const xml_node& node);
+  xml_node insert_child_before(const char_t* name, const xml_node& node);
+
+  // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
+  xml_node append_copy(const xml_node& proto);
+  xml_node prepend_copy(const xml_node& proto);
+  xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
+  xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
+
+  // Move the specified node to become a child of this node. Returns moved node, or empty node on errors.
+  xml_node append_move(const xml_node& moved);
+  xml_node prepend_move(const xml_node& moved);
+  xml_node insert_move_after(const xml_node& moved, const xml_node& node);
+  xml_node insert_move_before(const xml_node& moved, const xml_node& node);
+
+  // Remove specified attribute
+  bool remove_attribute(const xml_attribute& a);
+  bool remove_attribute(const char_t* name);
+
+  // Remove specified child
+  bool remove_child(const xml_node& n);
+  bool remove_child(const char_t* name);
+
+  // Parses buffer as an XML document fragment and appends all nodes as children of the current node.
+  // Copies/converts the buffer, so it may be deleted or changed after the function returns.
+  // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory.
+  xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Find attribute using predicate. Returns first attribute for which predicate returned true.
+  template <typename Predicate> xml_attribute find_attribute(Predicate pred) const {
+    if (!_root) return xml_attribute();
+
+    for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
+      if (pred(attrib))
+        return attrib;
+
+    return xml_attribute();
+  }
+
+  // Find child node using predicate. Returns first child for which predicate returned true.
+  template <typename Predicate> xml_node find_child(Predicate pred) const {
+    if (!_root) return xml_node();
+
+    for (xml_node node = first_child(); node; node = node.next_sibling())
+      if (pred(node))
+        return node;
+
+    return xml_node();
+  }
+
+  // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
+  template <typename Predicate> xml_node find_node(Predicate pred) const {
+    if (!_root) return xml_node();
+
+    xml_node cur = first_child();
+
+    while (cur._root && cur._root != _root) {
+      if (pred(cur)) return cur;
+
+      if (cur.first_child()) cur = cur.first_child();
+      else if (cur.next_sibling()) cur = cur.next_sibling();
+      else {
+        while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
+
+        if (cur._root != _root) cur = cur.next_sibling();
+      }
+    }
+
+    return xml_node();
+  }
+
+  // Find child node by attribute name/value
+  xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
+  xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
+
+#ifndef PUGIXML_NO_STL
+  // Get the absolute node path from root as a text string.
+  string_t path(char_t delimiter = '/') const;
+#endif
+
+  // Search for a node by path consisting of node names and . or .. elements.
+  xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
+
+  // Recursively traverse subtree with xml_tree_walker
+  bool traverse(xml_tree_walker& walker);
+
+#ifndef PUGIXML_NO_XPATH
+  // Select single node by evaluating XPath query. Returns first node from the resulting node set.
+  xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const;
+  xpath_node select_node(const xpath_query& query) const;
+
+  // Select node set by evaluating XPath query
+  xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
+  xpath_node_set select_nodes(const xpath_query& query) const;
+
+  // (deprecated: use select_node instead) Select single node by evaluating XPath query.
+  xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
+  xpath_node select_single_node(const xpath_query& query) const;
+
+#endif
+
+  // Print subtree using a writer object
+  void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+
+#ifndef PUGIXML_NO_STL
+  // Print subtree to stream
+  void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+  void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
+#endif
+
+  // Child nodes iterators
+  typedef xml_node_iterator iterator;
+
+  iterator begin() const;
+  iterator end() const;
+
+  // Attribute iterators
+  typedef xml_attribute_iterator attribute_iterator;
+
+  attribute_iterator attributes_begin() const;
+  attribute_iterator attributes_end() const;
+
+  // Range-based for support
+  xml_object_range<xml_node_iterator> children() const;
+  xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
+  xml_object_range<xml_attribute_iterator> attributes() const;
+
+  // Get node offset in parsed file/string (in char_t units) for debugging purposes
+  ptrdiff_t offset_debug() const;
+
+  // Get hash value (unique for handles to the same object)
+  size_t hash_value() const;
+
+  // Get internal pointer
+  xml_node_struct* internal_object() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
+#endif
+
+// A helper for working with text inside PCDATA nodes
+class PUGIXML_CLASS xml_text
+{
+  friend class xml_node;
+
+  xml_node_struct* _root;
+
+  typedef void (*unspecified_bool_type)(xml_text***);
+
+  explicit xml_text(xml_node_struct* root);
+
+  xml_node_struct* _data_new();
+  xml_node_struct* _data() const;
+
+public:
+  // Default constructor. Constructs an empty object.
+  xml_text();
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Check if text object is empty
+  bool empty() const;
+
+  // Get text, or "" if object is empty
+  const char_t* get() const;
+
+  // Get text, or the default value if object is empty
+  const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+  // Get text as a number, or the default value if conversion did not succeed or object is empty
+  int as_int(int def = 0) const;
+  unsigned int as_uint(unsigned int def = 0) const;
+  double as_double(double def = 0) const;
+  float as_float(float def = 0) const;
+
+#ifdef PUGIXML_HAS_LONG_LONG
+  long long as_llong(long long def = 0) const;
+  unsigned long long as_ullong(unsigned long long def = 0) const;
+#endif
+
+  // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
+  bool as_bool(bool def = false) const;
+
+  // Set text (returns false if object is empty or there is not enough memory)
+  bool set(const char_t* rhs);
+
+  // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+  bool set(int rhs);
+  bool set(unsigned int rhs);
+  bool set(double rhs);
+  bool set(float rhs);
+  bool set(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+  bool set(long long rhs);
+  bool set(unsigned long long rhs);
+#endif
+
+  // Set text (equivalent to set without error checking)
+  xml_text& operator=(const char_t* rhs);
+  xml_text& operator=(int rhs);
+  xml_text& operator=(unsigned int rhs);
+  xml_text& operator=(double rhs);
+  xml_text& operator=(float rhs);
+  xml_text& operator=(bool rhs);
+
+#ifdef PUGIXML_HAS_LONG_LONG
+  xml_text& operator=(long long rhs);
+  xml_text& operator=(unsigned long long rhs);
+#endif
+
+  // Get the data node (node_pcdata or node_cdata) for this object
+  xml_node data() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
+#endif
+
+// Child node iterator (a bidirectional iterator over a collection of xml_node)
+class PUGIXML_CLASS xml_node_iterator
+{
+  friend class xml_node;
+
+private:
+  mutable xml_node _wrap;
+  xml_node _parent;
+
+  xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
+
+public:
+  // Iterator traits
+  typedef ptrdiff_t difference_type;
+  typedef xml_node value_type;
+  typedef xml_node* pointer;
+  typedef xml_node& reference;
+
+#ifndef PUGIXML_NO_STL
+  typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+  // Default constructor
+  xml_node_iterator();
+
+  // Construct an iterator which points to the specified node
+  xml_node_iterator(const xml_node& node);
+
+  // Iterator operators
+  bool operator==(const xml_node_iterator& rhs) const;
+  bool operator!=(const xml_node_iterator& rhs) const;
+
+  xml_node& operator*() const;
+  xml_node* operator->() const;
+
+  const xml_node_iterator& operator++();
+  xml_node_iterator operator++(int);
+
+  const xml_node_iterator& operator--();
+  xml_node_iterator operator--(int);
+};
+
+// Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
+class PUGIXML_CLASS xml_attribute_iterator
+{
+  friend class xml_node;
+
+private:
+  mutable xml_attribute _wrap;
+  xml_node _parent;
+
+  xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
+
+public:
+  // Iterator traits
+  typedef ptrdiff_t difference_type;
+  typedef xml_attribute value_type;
+  typedef xml_attribute* pointer;
+  typedef xml_attribute& reference;
+
+#ifndef PUGIXML_NO_STL
+  typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+  // Default constructor
+  xml_attribute_iterator();
+
+  // Construct an iterator which points to the specified attribute
+  xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
+
+  // Iterator operators
+  bool operator==(const xml_attribute_iterator& rhs) const;
+  bool operator!=(const xml_attribute_iterator& rhs) const;
+
+  xml_attribute& operator*() const;
+  xml_attribute* operator->() const;
+
+  const xml_attribute_iterator& operator++();
+  xml_attribute_iterator operator++(int);
+
+  const xml_attribute_iterator& operator--();
+  xml_attribute_iterator operator--(int);
+};
+
+// Named node range helper
+class PUGIXML_CLASS xml_named_node_iterator
+{
+  friend class xml_node;
+
+public:
+  // Iterator traits
+  typedef ptrdiff_t difference_type;
+  typedef xml_node value_type;
+  typedef xml_node* pointer;
+  typedef xml_node& reference;
+
+#ifndef PUGIXML_NO_STL
+  typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+  // Default constructor
+  xml_named_node_iterator();
+
+  // Construct an iterator which points to the specified node
+  xml_named_node_iterator(const xml_node& node, const char_t* name);
+
+  // Iterator operators
+  bool operator==(const xml_named_node_iterator& rhs) const;
+  bool operator!=(const xml_named_node_iterator& rhs) const;
+
+  xml_node& operator*() const;
+  xml_node* operator->() const;
+
+  const xml_named_node_iterator& operator++();
+  xml_named_node_iterator operator++(int);
+
+  const xml_named_node_iterator& operator--();
+  xml_named_node_iterator operator--(int);
+
+private:
+  mutable xml_node _wrap;
+  xml_node _parent;
+  const char_t* _name;
+
+  xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name);
+};
+
+// Abstract tree walker class (see xml_node::traverse)
+class PUGIXML_CLASS xml_tree_walker
+{
+  friend class xml_node;
+
+private:
+  int _depth;
+
+protected:
+  // Get current traversal depth
+  int depth() const;
+
+public:
+  xml_tree_walker();
+  virtual ~xml_tree_walker();
+
+  // Callback that is called when traversal begins
+  virtual bool begin(xml_node& node);
+
+  // Callback that is called for each node traversed
+  virtual bool for_each(xml_node& node) = 0;
+
+  // Callback that is called when traversal ends
+  virtual bool end(xml_node& node);
+};
+
+// Parsing status, returned as part of xml_parse_result object
+enum xml_parse_status {
+  status_ok = 0,				// No error
+
+  status_file_not_found,		// File was not found during load_file()
+  status_io_error,			// Error reading from file/stream
+  status_out_of_memory,		// Could not allocate memory
+  status_internal_error,		// Internal error occurred
+
+  status_unrecognized_tag,	// Parser could not determine tag type
+
+  status_bad_pi,				// Parsing error occurred while parsing document declaration/processing instruction
+  status_bad_comment,			// Parsing error occurred while parsing comment
+  status_bad_cdata,			// Parsing error occurred while parsing CDATA section
+  status_bad_doctype,			// Parsing error occurred while parsing document type declaration
+  status_bad_pcdata,			// Parsing error occurred while parsing PCDATA section
+  status_bad_start_element,	// Parsing error occurred while parsing start element tag
+  status_bad_attribute,		// Parsing error occurred while parsing element attribute
+  status_bad_end_element,		// Parsing error occurred while parsing end element tag
+  status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
+
+  status_append_invalid_root,	// Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer)
+
+  status_no_document_element	// Parsing resulted in a document without element nodes
+};
+
+// Parsing result
+struct PUGIXML_CLASS xml_parse_result {
+  // Parsing status (see xml_parse_status)
+  xml_parse_status status;
+
+  // Last parsed offset (in char_t units from start of input data)
+  ptrdiff_t offset;
+
+  // Source document encoding
+  xml_encoding encoding;
+
+  // Default constructor, initializes object to failed state
+  xml_parse_result();
+
+  // Cast to bool operator
+  operator bool() const;
+
+  // Get error description
+  const char* description() const;
+};
+
+// Document class (DOM tree root)
+class PUGIXML_CLASS xml_document: public xml_node
+{
+private:
+  char_t* _buffer;
+
+  char _memory[192];
+
+  // Non-copyable semantics
+  xml_document(const xml_document&);
+  xml_document& operator=(const xml_document&);
+
+  void create();
+  void destroy();
+
+public:
+  // Default constructor, makes empty document
+  xml_document();
+
+  // Destructor, invalidates all node/attribute handles to this document
+  ~xml_document();
+
+  // Removes all nodes, leaving the empty document
+  void reset();
+
+  // Removes all nodes, then copies the entire contents of the specified document
+  void reset(const xml_document& proto);
+
+#ifndef PUGIXML_NO_STL
+  // Load document from stream.
+  xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+  xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
+#endif
+
+  // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
+  xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
+
+  // Load document from zero-terminated string. No encoding conversions are applied.
+  xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
+
+  // Load document from file
+  xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+  xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
+  xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+  // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
+  xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+  // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
+  xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
+  void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+#ifndef PUGIXML_NO_STL
+  // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
+  void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+  void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
+#endif
+
+  // Save XML to file
+  bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+  bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+  // Get document element
+  xml_node document_element() const;
+};
+
+#ifndef PUGIXML_NO_XPATH
+// XPath query return type
+enum xpath_value_type {
+  xpath_type_none,	  // Unknown type (query failed to compile)
+  xpath_type_node_set,  // Node set (xpath_node_set)
+  xpath_type_number,	  // Number
+  xpath_type_string,	  // String
+  xpath_type_boolean	  // Boolean
+};
+
+// XPath parsing result
+struct PUGIXML_CLASS xpath_parse_result {
+  // Error message (0 if no error)
+  const char* error;
+
+  // Last parsed offset (in char_t units from string start)
+  ptrdiff_t offset;
+
+  // Default constructor, initializes object to failed state
+  xpath_parse_result();
+
+  // Cast to bool operator
+  operator bool() const;
+
+  // Get error description
+  const char* description() const;
+};
+
+// A single XPath variable
+class PUGIXML_CLASS xpath_variable
+{
+  friend class xpath_variable_set;
+
+protected:
+  xpath_value_type _type;
+  xpath_variable* _next;
+
+  xpath_variable(xpath_value_type type);
+
+  // Non-copyable semantics
+  xpath_variable(const xpath_variable&);
+  xpath_variable& operator=(const xpath_variable&);
+
+public:
+  // Get variable name
+  const char_t* name() const;
+
+  // Get variable type
+  xpath_value_type type() const;
+
+  // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
+  bool get_boolean() const;
+  double get_number() const;
+  const char_t* get_string() const;
+  const xpath_node_set& get_node_set() const;
+
+  // Set variable value; no type conversion is performed, false is returned on type mismatch error
+  bool set(bool value);
+  bool set(double value);
+  bool set(const char_t* value);
+  bool set(const xpath_node_set& value);
+};
+
+// A set of XPath variables
+class PUGIXML_CLASS xpath_variable_set
+{
+private:
+  xpath_variable* _data[64];
+
+  void _assign(const xpath_variable_set& rhs);
+  void _swap(xpath_variable_set& rhs);
+
+  xpath_variable* _find(const char_t* name) const;
+
+  static bool _clone(xpath_variable* var, xpath_variable** out_result);
+  static void _destroy(xpath_variable* var);
+
+public:
+  // Default constructor/destructor
+  xpath_variable_set();
+  ~xpath_variable_set();
+
+  // Copy constructor/assignment operator
+  xpath_variable_set(const xpath_variable_set& rhs);
+  xpath_variable_set& operator=(const xpath_variable_set& rhs);
+
+#if __cplusplus >= 201103
+  // Move semantics support
+  xpath_variable_set(xpath_variable_set&& rhs);
+  xpath_variable_set& operator=(xpath_variable_set&& rhs);
+#endif
+
+  // Add a new variable or get the existing one, if the types match
+  xpath_variable* add(const char_t* name, xpath_value_type type);
+
+  // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
+  bool set(const char_t* name, bool value);
+  bool set(const char_t* name, double value);
+  bool set(const char_t* name, const char_t* value);
+  bool set(const char_t* name, const xpath_node_set& value);
+
+  // Get existing variable by name
+  xpath_variable* get(const char_t* name);
+  const xpath_variable* get(const char_t* name) const;
+};
+
+// A compiled XPath query object
+class PUGIXML_CLASS xpath_query
+{
+private:
+  void* _impl;
+  xpath_parse_result _result;
+
+  typedef void (*unspecified_bool_type)(xpath_query***);
+
+  // Non-copyable semantics
+  xpath_query(const xpath_query&);
+  xpath_query& operator=(const xpath_query&);
+
+public:
+  // Construct a compiled object from XPath expression.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
+  explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
+
+  // Constructor
+  xpath_query();
+
+  // Destructor
+  ~xpath_query();
+
+#if __cplusplus >= 201103
+  // Move semantics support
+  xpath_query(xpath_query&& rhs);
+  xpath_query& operator=(xpath_query&& rhs);
+#endif
+
+  // Get query expression return type
+  xpath_value_type return_type() const;
+
+  // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  bool evaluate_boolean(const xpath_node& n) const;
+
+  // Evaluate expression as double value in the specified context; performs type conversion if necessary.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  double evaluate_number(const xpath_node& n) const;
+
+#ifndef PUGIXML_NO_STL
+  // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  string_t evaluate_string(const xpath_node& n) const;
+#endif
+
+  // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+  // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  // If PUGIXML_NO_EXCEPTIONS is defined, returns empty  set instead.
+  size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
+
+  // Evaluate expression as node set in the specified context.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+  // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
+  xpath_node_set evaluate_node_set(const xpath_node& n) const;
+
+  // Evaluate expression as node set in the specified context.
+  // Return first node in document order, or empty node if node set is empty.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+  // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead.
+  xpath_node evaluate_node(const xpath_node& n) const;
+
+  // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
+  const xpath_parse_result& result() const;
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+};
+
+#ifndef PUGIXML_NO_EXCEPTIONS
+// XPath exception class
+class PUGIXML_CLASS xpath_exception: public std::exception
+{
+private:
+  xpath_parse_result _result;
+
+public:
+  // Construct exception from parse result
+  explicit xpath_exception(const xpath_parse_result& result);
+
+  // Get error message
+  virtual const char* what() const throw();
+
+  // Get parse result
+  const xpath_parse_result& result() const;
+};
+#endif
+
+// XPath node class (either xml_node or xml_attribute)
+class PUGIXML_CLASS xpath_node
+{
+private:
+  xml_node _node;
+  xml_attribute _attribute;
+
+  typedef void (*unspecified_bool_type)(xpath_node***);
+
+public:
+  // Default constructor; constructs empty XPath node
+  xpath_node();
+
+  // Construct XPath node from XML node/attribute
+  xpath_node(const xml_node& node);
+  xpath_node(const xml_attribute& attribute, const xml_node& parent);
+
+  // Get node/attribute, if any
+  xml_node node() const;
+  xml_attribute attribute() const;
+
+  // Get parent of contained node/attribute
+  xml_node parent() const;
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Comparison operators
+  bool operator==(const xpath_node& n) const;
+  bool operator!=(const xpath_node& n) const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
+#endif
+
+// A fixed-size collection of XPath nodes
+class PUGIXML_CLASS xpath_node_set
+{
+public:
+  // Collection type
+  enum type_t {
+    type_unsorted,			// Not ordered
+    type_sorted,			// Sorted by document order (ascending)
+    type_sorted_reverse		// Sorted by document order (descending)
+  };
+
+  // Constant iterator type
+  typedef const xpath_node* const_iterator;
+
+  // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work
+  typedef const xpath_node* iterator;
+
+  // Default constructor. Constructs empty set.
+  xpath_node_set();
+
+  // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
+  xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
+
+  // Destructor
+  ~xpath_node_set();
+
+  // Copy constructor/assignment operator
+  xpath_node_set(const xpath_node_set& ns);
+  xpath_node_set& operator=(const xpath_node_set& ns);
+
+#if __cplusplus >= 201103
+  // Move semantics support
+  xpath_node_set(xpath_node_set&& rhs);
+  xpath_node_set& operator=(xpath_node_set&& rhs);
+#endif
+
+  // Get collection type
+  type_t type() const;
+
+  // Get collection size
+  size_t size() const;
+
+  // Indexing operator
+  const xpath_node& operator[](size_t index) const;
+
+  // Collection iterators
+  const_iterator begin() const;
+  const_iterator end() const;
+
+  // Sort the collection in ascending/descending order by document order
+  void sort(bool reverse = false);
+
+  // Get first node in the collection by document order
+  xpath_node first() const;
+
+  // Check if collection is empty
+  bool empty() const;
+
+private:
+  type_t _type;
+
+  xpath_node _storage;
+
+  xpath_node* _begin;
+  xpath_node* _end;
+
+  void _assign(const_iterator begin, const_iterator end, type_t type);
+  void _move(xpath_node_set& rhs);
+};
+#endif
+
+#ifndef PUGIXML_NO_STL
+// Convert wide string to UTF8
+std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
+std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
+
+// Convert UTF8 to wide string
+std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
+std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
+#endif
+
+// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
+typedef void* (*allocation_function)(size_t size);
+
+// Memory deallocation function interface
+typedef void (*deallocation_function)(void* ptr);
+
+// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
+void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
+
+// Get current memory management functions
+allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
+deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#endif
+
+// Make sure implementation is included in header-only mode
+// Use macro expansion in #include to work around QMake (QTBUG-11923)
+#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE)
+#	define PUGIXML_SOURCE "pugixml.cpp"
+#	include PUGIXML_SOURCE
+#endif
+
+/**
+ * Copyright (c) 2006-2015 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/AlignedSentence.h b/mosesdecoder/phrase-extract/extract-mixed-syntax/AlignedSentence.h
new file mode 100644
index 0000000000000000000000000000000000000000..30c3d06a26d2b98d5f5e0cc1798b754f91afe392
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/AlignedSentence.h
@@ -0,0 +1,54 @@
+/*
+ * AlignedSentence.h
+ *
+ *  Created on: 18 Feb 2014
+ *      Author: s0565741
+ */
+#pragma once
+
+#include <string>
+#include <set>
+#include "ConsistentPhrases.h"
+#include "Phrase.h"
+#include "moses/TypeDef.h"
+
+class Parameter;
+
+class AlignedSentence
+{
+public:
+  AlignedSentence(int lineNum)
+    :m_lineNum(lineNum)
+  {}
+
+  AlignedSentence(int lineNum,
+                  const std::string &source,
+                  const std::string &target,
+                  const std::string &alignment);
+  virtual ~AlignedSentence();
+  virtual void Create(const Parameter &params);
+
+  const Phrase &GetPhrase(Moses::FactorDirection direction) const {
+    return (direction == Moses::Input) ? m_source : m_target;
+  }
+
+  const ConsistentPhrases &GetConsistentPhrases() const {
+    return m_consistentPhrases;
+  }
+
+  virtual std::string Debug() const;
+
+  int m_lineNum;
+protected:
+  Phrase m_source, m_target;
+  ConsistentPhrases m_consistentPhrases;
+
+  void CreateConsistentPhrases(const Parameter &params);
+  void PopulateWordVec(Phrase &vec, const std::string &line);
+
+  // m_source and m_target MUST be populated before calling this
+  void PopulateAlignment(const std::string &line);
+  std::vector<int> GetSourceAlignmentCount() const;
+};
+
+
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp b/mosesdecoder/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cb088f5b4dd4d8823a297be7b208c1d0a6a7eebc
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/AlignedSentenceSyntax.cpp
@@ -0,0 +1,182 @@
+/*
+ * AlignedSentenceSyntax.cpp
+ *
+ *  Created on: 26 Feb 2014
+ *      Author: hieu
+ */
+
+#include "AlignedSentenceSyntax.h"
+#include "Parameter.h"
+#include "pugixml.hpp"
+#include "moses/Util.h"
+
+using namespace std;
+
+AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
+    const std::string &source,
+    const std::string &target,
+    const std::string &alignment)
+  :AlignedSentence(lineNum)
+  ,m_sourceStr(source)
+  ,m_targetStr(target)
+  ,m_alignmentStr(alignment)
+{
+}
+
+AlignedSentenceSyntax::~AlignedSentenceSyntax()
+{
+  // TODO Auto-generated destructor stub
+}
+
+void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
+                                     string line, Phrase &phrase, SyntaxTree &tree)
+{
+  // parse source and target string
+  if (isSyntax) {
+    line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
+    XMLParse(phrase, tree, line, params);
+
+    if (mixedSyntaxType != 0) {
+      // mixed syntax. Always add [X] where there isn't 1
+      tree.SetHieroLabel(params.hieroNonTerm);
+      if (mixedSyntaxType == 2) {
+        tree.AddToAll(params.hieroNonTerm);
+      }
+    }
+  } else {
+    PopulateWordVec(phrase, line);
+    tree.SetHieroLabel(params.hieroNonTerm);
+  }
+
+}
+
+void AlignedSentenceSyntax::Create(const Parameter &params)
+{
+  Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
+           m_source, m_sourceTree);
+  Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
+           m_target, m_targetTree);
+
+  PopulateAlignment(m_alignmentStr);
+  CreateConsistentPhrases(params);
+
+  // create labels
+  CreateNonTerms();
+}
+
+void Escape(string &text)
+{
+  text = Moses::Replace(text, "&", "&amp;");
+  text = Moses::Replace(text, "|", "&#124;");
+  text = Moses::Replace(text, "<", "&lt;");
+  text = Moses::Replace(text, ">", "&gt;");
+  text = Moses::Replace(text, "'", "&apos;");
+  text = Moses::Replace(text, "\"", "&quot;");
+  text = Moses::Replace(text, "[", "&#91;");
+  text = Moses::Replace(text, "]", "&#93;");
+
+}
+
+void AlignedSentenceSyntax::XMLParse(Phrase &output,
+                                     SyntaxTree &tree,
+                                     const pugi::xml_node &parentNode,
+                                     const Parameter &params)
+{
+  int childNum = 0;
+  for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
+    string nodeName = childNode.name();
+
+    // span label
+    string label;
+    int startPos = output.size();
+
+    if (!nodeName.empty()) {
+      pugi::xml_attribute attribute = childNode.attribute("label");
+      label = attribute.as_string();
+
+      // recursively call this function. For proper recursive trees
+      XMLParse(output, tree, childNode, params);
+    }
+
+
+
+    // fill phrase vector
+    string text = childNode.value();
+    Escape(text);
+    //cerr << childNum << " " << label << "=" << text << endl;
+
+    std::vector<string> toks;
+    Moses::Tokenize(toks, text);
+
+    for (size_t i = 0; i < toks.size(); ++i) {
+      const string &tok = toks[i];
+      Word *word = new Word(output.size(), tok);
+      output.push_back(word);
+    }
+
+    // is it a labelled span?
+    int endPos = output.size() - 1;
+
+    // fill syntax labels
+    if (!label.empty()) {
+      label = "[" + label + "]";
+      tree.Add(startPos, endPos, label, params);
+    }
+
+    ++childNum;
+  }
+
+}
+
+void AlignedSentenceSyntax::XMLParse(Phrase &output,
+                                     SyntaxTree &tree,
+                                     const std::string input,
+                                     const Parameter &params)
+{
+  pugi::xml_document doc;
+  pugi::xml_parse_result result = doc.load(input.c_str(),
+                                  pugi::parse_default | pugi::parse_comments);
+
+  pugi::xml_node topNode = doc.child("xml");
+  XMLParse(output, tree, topNode, params);
+}
+
+void AlignedSentenceSyntax::CreateNonTerms()
+{
+  for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
+    for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
+      ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
+      const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
+
+      ConsistentPhrases::Coll::iterator iter;
+      for (iter = coll.begin(); iter != coll.end(); ++iter) {
+        ConsistentPhrase &cp = **iter;
+
+        int targetStart = cp.corners[2];
+        int targetEnd = cp.corners[3];
+        const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
+
+        CreateNonTerms(cp, sourceLabels, targetLabels);
+      }
+    }
+  }
+
+}
+
+void AlignedSentenceSyntax::CreateNonTerms(ConsistentPhrase &cp,
+    const SyntaxTree::Labels &sourceLabels,
+    const SyntaxTree::Labels &targetLabels)
+{
+  SyntaxTree::Labels::const_iterator iterSource;
+  for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
+    const string &sourceLabel = *iterSource;
+
+    SyntaxTree::Labels::const_iterator iterTarget;
+    for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
+      const string &targetLabel = *iterTarget;
+      cp.AddNonTerms(sourceLabel, targetLabel);
+    }
+  }
+}
+
+
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h b/mosesdecoder/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
new file mode 100644
index 0000000000000000000000000000000000000000..1347b46659a42e4a446fe75e9e2233c23819a4eb
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/ConsistentPhrases.h
@@ -0,0 +1,41 @@
+/*
+ * ConsistentPhrases.h
+ *
+ *  Created on: 20 Feb 2014
+ *      Author: hieu
+ */
+#pragma once
+
+#include <set>
+#include <vector>
+#include <iostream>
+#include "ConsistentPhrase.h"
+
+class Word;
+class Parameter;
+
+class ConsistentPhrases
+{
+public:
+  typedef std::set<ConsistentPhrase*> Coll;
+
+  ConsistentPhrases();
+  virtual ~ConsistentPhrases();
+
+  void Initialize(size_t size);
+
+  void Add(int sourceStart, int sourceEnd,
+           int targetStart, int targetEnd,
+           const Parameter &params);
+
+  void AddHieroNonTerms(const Parameter &params);
+
+  const Coll &GetColl(int sourceStart, int sourceEnd) const;
+  Coll &GetColl(int sourceStart, int sourceEnd);
+
+  std::string Debug() const;
+
+protected:
+  std::vector< std::vector<Coll> > m_coll;
+};
+
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/InputFileStream.h b/mosesdecoder/phrase-extract/extract-mixed-syntax/InputFileStream.h
new file mode 100644
index 0000000000000000000000000000000000000000..5de41623787bf0a8237f61f438b484f26f6290b1
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/InputFileStream.h
@@ -0,0 +1,48 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#ifndef moses_InputFileStream_h
+#define moses_InputFileStream_h
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+namespace Moses
+{
+
+/** Used in place of std::istream, can read zipped files if it ends in .gz
+ */
+class InputFileStream : public std::istream
+{
+protected:
+  std::streambuf *m_streambuf;
+public:
+
+  explicit InputFileStream(const std::string &filePath);
+  ~InputFileStream();
+
+  void Close();
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/Main.h b/mosesdecoder/phrase-extract/extract-mixed-syntax/Main.h
new file mode 100644
index 0000000000000000000000000000000000000000..9744ba389b1b36dd7afab06c4b36379215db6780
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/Main.h
@@ -0,0 +1,12 @@
+/*
+ * Main.h
+ *
+ *  Created on: 28 Feb 2014
+ *      Author: hieu
+ */
+#pragma once
+
+#include "OutputFileStream.h"
+
+void CreateGlueGrammar(Moses::OutputFileStream &glueFile);
+
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/NonTerm.cpp b/mosesdecoder/phrase-extract/extract-mixed-syntax/NonTerm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..456be8932d320e2914e61ea2d0ecac2ca2946c98
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/NonTerm.cpp
@@ -0,0 +1,69 @@
+/*
+ * NonTerm.cpp
+ *
+ *  Created on: 22 Feb 2014
+ *      Author: hieu
+ */
+
+#include <sstream>
+#include "NonTerm.h"
+#include "Word.h"
+#include "ConsistentPhrase.h"
+#include "Parameter.h"
+
+using namespace std;
+
+NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
+                 const std::string &source,
+                 const std::string &target)
+  :m_consistentPhrase(&consistentPhrase)
+  ,m_source(source)
+  ,m_target(target)
+{
+  // TODO Auto-generated constructor stub
+
+}
+
+NonTerm::~NonTerm()
+{
+  // TODO Auto-generated destructor stub
+}
+
+std::string NonTerm::Debug() const
+{
+  stringstream out;
+  out << m_source << m_target;
+  out << m_consistentPhrase->Debug();
+  return out.str();
+}
+
+void NonTerm::Output(std::ostream &out) const
+{
+  out << m_source << m_target;
+}
+
+void NonTerm::Output(std::ostream &out, Moses::FactorDirection direction) const
+{
+  out << GetLabel(direction);
+}
+
+const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
+{
+  return (direction == Moses::Input) ? m_source : m_target;
+}
+
+bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter &params) const
+{
+  const std::string &label = NonTerm::GetLabel(direction);
+  return label == params.hieroNonTerm;
+}
+
+bool NonTerm::IsHiero(const Parameter &params) const
+{
+  return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
+}
+
+int NonTerm::GetWidth(Moses::FactorDirection direction) const
+{
+  return GetConsistentPhrase().GetWidth(direction);
+}
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/NonTerm.h b/mosesdecoder/phrase-extract/extract-mixed-syntax/NonTerm.h
new file mode 100644
index 0000000000000000000000000000000000000000..7019787310f0263e3f45a453d2e0ec873f7d5bad
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/NonTerm.h
@@ -0,0 +1,50 @@
+/*
+ * NonTerm.h
+ *
+ *  Created on: 22 Feb 2014
+ *      Author: hieu
+ */
+#pragma once
+#include <string>
+#include "RuleSymbol.h"
+#include "moses/TypeDef.h"
+
+class ConsistentPhrase;
+class Parameter;
+
+class NonTerm : public RuleSymbol
+{
+public:
+
+  NonTerm(const ConsistentPhrase &consistentPhrase,
+          const std::string &source,
+          const std::string &target);
+  virtual ~NonTerm();
+
+  const ConsistentPhrase &GetConsistentPhrase() const {
+    return *m_consistentPhrase;
+  }
+
+  int GetWidth(Moses::FactorDirection direction) const;
+
+  virtual bool IsNonTerm() const {
+    return true;
+  }
+
+  std::string GetString() const {
+    return m_source + m_target;
+  }
+
+  virtual std::string Debug() const;
+  virtual void Output(std::ostream &out) const;
+  void Output(std::ostream &out, Moses::FactorDirection direction) const;
+
+  const std::string &GetLabel(Moses::FactorDirection direction) const;
+  bool IsHiero(Moses::FactorDirection direction, const Parameter &params) const;
+  bool IsHiero(const Parameter &params) const;
+
+protected:
+  const ConsistentPhrase *m_consistentPhrase;
+  std::string m_source, m_target;
+};
+
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/Phrase.h b/mosesdecoder/phrase-extract/extract-mixed-syntax/Phrase.h
new file mode 100644
index 0000000000000000000000000000000000000000..57cfaf9c26789f52fbea5201bf4ac61158941e36
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/Phrase.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <vector>
+#include "Word.h"
+
+// a vector of terminals
+class Phrase : public std::vector<Word*>
+{
+public:
+  Phrase()
+  {}
+
+  Phrase(size_t size)
+    :std::vector<Word*>(size)
+  {}
+
+  std::string Debug() const;
+
+};
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/Word.cpp b/mosesdecoder/phrase-extract/extract-mixed-syntax/Word.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f36391f1aa74bfa452ce0d4f38b07a867007d67d
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/Word.cpp
@@ -0,0 +1,69 @@
+/*
+ * Word.cpp
+ *
+ *  Created on: 18 Feb 2014
+ *      Author: s0565741
+ */
+#include <limits>
+#include "Word.h"
+#include "moses/Util.h"
+
+using namespace std;
+
+Word::Word(int pos, const std::string &str)
+  :m_pos(pos)
+  ,m_str(str)
+{
+  // TODO Auto-generated constructor stub
+
+}
+
+Word::~Word()
+{
+  // TODO Auto-generated destructor stub
+}
+
+void Word::AddAlignment(const Word *other)
+{
+  m_alignment.insert(other);
+}
+
+std::set<int> Word::GetAlignmentIndex() const
+{
+  std::set<int> ret;
+
+  std::set<const Word *>::const_iterator iter;
+  for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
+    const Word &otherWord = **iter;
+    int otherPos = otherWord.GetPos();
+    ret.insert(otherPos);
+  }
+
+  return ret;
+}
+
+void Word::Output(std::ostream &out) const
+{
+  out << m_str;
+}
+
+std::string Word::Debug() const
+{
+  return m_str;
+}
+
+int Word::CompareString(const Word &other) const
+{
+  return m_str.compare(other.m_str);
+}
+
+std::string Word::GetString(int factor) const
+{
+  vector<string> toks;
+  Moses::Tokenize(toks, m_str, "|");
+
+  assert(factor < toks.size());
+  return toks[factor];
+}
+
+
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/pugiconfig.hpp b/mosesdecoder/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5a63fd488ef411bbf72118254eeb749cb7bca0ef
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
@@ -0,0 +1,69 @@
+/**
+ * pugixml parser - version 1.2
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef HEADER_PUGICONFIG_HPP
+#define HEADER_PUGICONFIG_HPP
+
+// Uncomment this to enable wchar_t mode
+// #define PUGIXML_WCHAR_MODE
+
+// Uncomment this to disable XPath
+// #define PUGIXML_NO_XPATH
+
+// Uncomment this to disable STL
+// #define PUGIXML_NO_STL
+
+// Uncomment this to disable exceptions
+// #define PUGIXML_NO_EXCEPTIONS
+
+// Set this to control attributes for public classes/functions, i.e.:
+// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
+// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
+// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
+// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
+
+// Uncomment this to switch to header-only version
+// #define PUGIXML_HEADER_ONLY
+// #include "pugixml.cpp"
+
+// Tune these constants to adjust memory-related behavior
+// #define PUGIXML_MEMORY_PAGE_SIZE 32768
+// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
+// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
+
+#endif
+
+/**
+ * Copyright (c) 2006-2012 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/mosesdecoder/phrase-extract/extract-mixed-syntax/pugixml.hpp b/mosesdecoder/phrase-extract/extract-mixed-syntax/pugixml.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cbc527bef429f40438e774b32d8cc76c8e4c1d6c
--- /dev/null
+++ b/mosesdecoder/phrase-extract/extract-mixed-syntax/pugixml.hpp
@@ -0,0 +1,1256 @@
+/**
+ * pugixml parser - version 1.2
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef PUGIXML_VERSION
+// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
+#	define PUGIXML_VERSION 120
+#endif
+
+// Include user configuration file (this can define various configuration macros)
+#include "pugiconfig.hpp"
+
+#ifndef HEADER_PUGIXML_HPP
+#define HEADER_PUGIXML_HPP
+
+// Include stddef.h for size_t and ptrdiff_t
+#include <cstddef>
+
+// Include exception header for XPath
+#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS)
+#	include <exception>
+#endif
+
+// Include STL headers
+#ifndef PUGIXML_NO_STL
+#	include <iterator>
+#	include <iosfwd>
+#	include <string>
+#endif
+
+// Macro for deprecated features
+#ifndef PUGIXML_DEPRECATED
+#	if defined(__GNUC__)
+#		define PUGIXML_DEPRECATED __attribute__((deprecated))
+#	elif defined(_MSC_VER) && _MSC_VER >= 1300
+#		define PUGIXML_DEPRECATED __declspec(deprecated)
+#	else
+#		define PUGIXML_DEPRECATED
+#	endif
+#endif
+
+// If no API is defined, assume default
+#ifndef PUGIXML_API
+#	define PUGIXML_API
+#endif
+
+// If no API for classes is defined, assume default
+#ifndef PUGIXML_CLASS
+#	define PUGIXML_CLASS PUGIXML_API
+#endif
+
+// If no API for functions is defined, assume default
+#ifndef PUGIXML_FUNCTION
+#	define PUGIXML_FUNCTION PUGIXML_API
+#endif
+
+// Character interface macros
+#ifdef PUGIXML_WCHAR_MODE
+#	define PUGIXML_TEXT(t) L ## t
+#	define PUGIXML_CHAR wchar_t
+#else
+#	define PUGIXML_TEXT(t) t
+#	define PUGIXML_CHAR char
+#endif
+
+namespace pugi
+{
+// Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE
+typedef PUGIXML_CHAR char_t;
+
+#ifndef PUGIXML_NO_STL
+// String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE
+typedef std::basic_string<PUGIXML_CHAR, std::char_traits<PUGIXML_CHAR>, std::allocator<PUGIXML_CHAR> > string_t;
+#endif
+}
+
+// The PugiXML namespace
+namespace pugi
+{
+// Tree node types
+enum xml_node_type {
+  node_null,			// Empty (null) node handle
+  node_document,		// A document tree's absolute root
+  node_element,		// Element tag, i.e. '<node/>'
+  node_pcdata,		// Plain character data, i.e. 'text'
+  node_cdata,			// Character data, i.e. '<![CDATA[text]]>'
+  node_comment,		// Comment tag, i.e. '<!-- text -->'
+  node_pi,			// Processing instruction, i.e. '<?name?>'
+  node_declaration,	// Document declaration, i.e. '<?xml version="1.0"?>'
+  node_doctype		// Document type declaration, i.e. '<!DOCTYPE doc>'
+};
+
+// Parsing options
+
+// Minimal parsing mode (equivalent to turning all other flags off).
+// Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed.
+const unsigned int parse_minimal = 0x0000;
+
+// This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default.
+const unsigned int parse_pi = 0x0001;
+
+// This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default.
+const unsigned int parse_comments = 0x0002;
+
+// This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default.
+const unsigned int parse_cdata = 0x0004;
+
+// This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree.
+// This flag is off by default; turning it on usually results in slower parsing and more memory consumption.
+const unsigned int parse_ws_pcdata = 0x0008;
+
+// This flag determines if character and entity references are expanded during parsing. This flag is on by default.
+const unsigned int parse_escapes = 0x0010;
+
+// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
+const unsigned int parse_eol = 0x0020;
+
+// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
+const unsigned int parse_wconv_attribute = 0x0040;
+
+// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
+const unsigned int parse_wnorm_attribute = 0x0080;
+
+// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
+const unsigned int parse_declaration = 0x0100;
+
+// This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default.
+const unsigned int parse_doctype = 0x0200;
+
+// This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only
+// of whitespace is added to the DOM tree.
+// This flag is off by default; turning it on may result in slower parsing and more memory consumption.
+const unsigned int parse_ws_pcdata_single = 0x0400;
+
+// The default parsing mode.
+// Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded,
+// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol;
+
+// The full parsing mode.
+// Nodes of all types are added to the DOM tree, character/reference entities are expanded,
+// End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules.
+const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype;
+
+// These flags determine the encoding of input data for XML document
+enum xml_encoding {
+  encoding_auto,		// Auto-detect input encoding using BOM or < / <? detection; use UTF8 if BOM is not found
+  encoding_utf8,		// UTF8 encoding
+  encoding_utf16_le,	// Little-endian UTF16
+  encoding_utf16_be,	// Big-endian UTF16
+  encoding_utf16,		// UTF16 with native endianness
+  encoding_utf32_le,	// Little-endian UTF32
+  encoding_utf32_be,	// Big-endian UTF32
+  encoding_utf32,		// UTF32 with native endianness
+  encoding_wchar,		// The same encoding wchar_t has (either UTF16 or UTF32)
+  encoding_latin1
+};
+
+// Formatting flags
+
+// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
+const unsigned int format_indent = 0x01;
+
+// Write encoding-specific BOM to the output stream. This flag is off by default.
+const unsigned int format_write_bom = 0x02;
+
+// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
+const unsigned int format_raw = 0x04;
+
+// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
+const unsigned int format_no_declaration = 0x08;
+
+// Don't escape attribute values and PCDATA contents. This flag is off by default.
+const unsigned int format_no_escapes = 0x10;
+
+// Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default.
+const unsigned int format_save_file_text = 0x20;
+
+// The default set of formatting flags.
+// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
+const unsigned int format_default = format_indent;
+
+// Forward declarations
+struct xml_attribute_struct;
+struct xml_node_struct;
+
+class xml_node_iterator;
+class xml_attribute_iterator;
+class xml_named_node_iterator;
+
+class xml_tree_walker;
+
+class xml_node;
+
+class xml_text;
+
+#ifndef PUGIXML_NO_XPATH
+class xpath_node;
+class xpath_node_set;
+class xpath_query;
+class xpath_variable_set;
+#endif
+
+// Range-based for loop support
+template <typename It> class xml_object_range
+{
+public:
+  typedef It const_iterator;
+
+  xml_object_range(It b, It e): _begin(b), _end(e) {
+  }
+
+  It begin() const {
+    return _begin;
+  }
+  It end() const {
+    return _end;
+  }
+
+private:
+  It _begin, _end;
+};
+
+// Writer interface for node printing (see xml_node::print)
+class PUGIXML_CLASS xml_writer
+{
+public:
+  virtual ~xml_writer() {}
+
+  // Write memory chunk into stream/file/whatever
+  virtual void write(const void* data, size_t size) = 0;
+};
+
+// xml_writer implementation for FILE*
+class PUGIXML_CLASS xml_writer_file: public xml_writer
+{
+public:
+  // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio
+  xml_writer_file(void* file);
+
+  virtual void write(const void* data, size_t size);
+
+private:
+  void* file;
+};
+
+#ifndef PUGIXML_NO_STL
+// xml_writer implementation for streams
+class PUGIXML_CLASS xml_writer_stream: public xml_writer
+{
+public:
+  // Construct writer from an output stream object
+  xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream);
+  xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream);
+
+  virtual void write(const void* data, size_t size);
+
+private:
+  std::basic_ostream<char, std::char_traits<char> >* narrow_stream;
+  std::basic_ostream<wchar_t, std::char_traits<wchar_t> >* wide_stream;
+};
+#endif
+
+// A light-weight handle for manipulating attributes in DOM tree
+class PUGIXML_CLASS xml_attribute
+{
+  friend class xml_attribute_iterator;
+  friend class xml_node;
+
+private:
+  xml_attribute_struct* _attr;
+
+  typedef void (*unspecified_bool_type)(xml_attribute***);
+
+public:
+  // Default constructor. Constructs an empty attribute.
+  xml_attribute();
+
+  // Constructs attribute from internal pointer
+  explicit xml_attribute(xml_attribute_struct* attr);
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Comparison operators (compares wrapped attribute pointers)
+  bool operator==(const xml_attribute& r) const;
+  bool operator!=(const xml_attribute& r) const;
+  bool operator<(const xml_attribute& r) const;
+  bool operator>(const xml_attribute& r) const;
+  bool operator<=(const xml_attribute& r) const;
+  bool operator>=(const xml_attribute& r) const;
+
+  // Check if attribute is empty
+  bool empty() const;
+
+  // Get attribute name/value, or "" if attribute is empty
+  const char_t* name() const;
+  const char_t* value() const;
+
+  // Get attribute value, or the default value if attribute is empty
+  const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+  // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty
+  int as_int(int def = 0) const;
+  unsigned int as_uint(unsigned int def = 0) const;
+  double as_double(double def = 0) const;
+  float as_float(float def = 0) const;
+
+  // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty
+  bool as_bool(bool def = false) const;
+
+  // Set attribute name/value (returns false if attribute is empty or there is not enough memory)
+  bool set_name(const char_t* rhs);
+  bool set_value(const char_t* rhs);
+
+  // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+  bool set_value(int rhs);
+  bool set_value(unsigned int rhs);
+  bool set_value(double rhs);
+  bool set_value(bool rhs);
+
+  // Set attribute value (equivalent to set_value without error checking)
+  xml_attribute& operator=(const char_t* rhs);
+  xml_attribute& operator=(int rhs);
+  xml_attribute& operator=(unsigned int rhs);
+  xml_attribute& operator=(double rhs);
+  xml_attribute& operator=(bool rhs);
+
+  // Get next/previous attribute in the attribute list of the parent node
+  xml_attribute next_attribute() const;
+  xml_attribute previous_attribute() const;
+
+  // Get hash value (unique for handles to the same object)
+  size_t hash_value() const;
+
+  // Get internal pointer
+  xml_attribute_struct* internal_object() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs);
+#endif
+
+// A light-weight handle for manipulating nodes in DOM tree
+class PUGIXML_CLASS xml_node
+{
+  friend class xml_attribute_iterator;
+  friend class xml_node_iterator;
+  friend class xml_named_node_iterator;
+
+protected:
+  xml_node_struct* _root;
+
+  typedef void (*unspecified_bool_type)(xml_node***);
+
+public:
+  // Default constructor. Constructs an empty node.
+  xml_node();
+
+  // Constructs node from internal pointer
+  explicit xml_node(xml_node_struct* p);
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Comparison operators (compares wrapped node pointers)
+  bool operator==(const xml_node& r) const;
+  bool operator!=(const xml_node& r) const;
+  bool operator<(const xml_node& r) const;
+  bool operator>(const xml_node& r) const;
+  bool operator<=(const xml_node& r) const;
+  bool operator>=(const xml_node& r) const;
+
+  // Check if node is empty.
+  bool empty() const;
+
+  // Get node type
+  xml_node_type type() const;
+
+  // Get node name/value, or "" if node is empty or it has no name/value
+  const char_t* name() const;
+  const char_t* value() const;
+
+  // Get attribute list
+  xml_attribute first_attribute() const;
+  xml_attribute last_attribute() const;
+
+  // Get children list
+  xml_node first_child() const;
+  xml_node last_child() const;
+
+  // Get next/previous sibling in the children list of the parent node
+  xml_node next_sibling() const;
+  xml_node previous_sibling() const;
+
+  // Get parent node
+  xml_node parent() const;
+
+  // Get root of DOM tree this node belongs to
+  xml_node root() const;
+
+  // Get text object for the current node
+  xml_text text() const;
+
+  // Get child, attribute or next/previous sibling with the specified name
+  xml_node child(const char_t* name) const;
+  xml_attribute attribute(const char_t* name) const;
+  xml_node next_sibling(const char_t* name) const;
+  xml_node previous_sibling(const char_t* name) const;
+
+  // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA
+  const char_t* child_value() const;
+
+  // Get child value of child with specified name. Equivalent to child(name).child_value().
+  const char_t* child_value(const char_t* name) const;
+
+  // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
+  bool set_name(const char_t* rhs);
+  bool set_value(const char_t* rhs);
+
+  // Add attribute with specified name. Returns added attribute, or empty attribute on errors.
+  xml_attribute append_attribute(const char_t* name);
+  xml_attribute prepend_attribute(const char_t* name);
+  xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr);
+  xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr);
+
+  // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors.
+  xml_attribute append_copy(const xml_attribute& proto);
+  xml_attribute prepend_copy(const xml_attribute& proto);
+  xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr);
+  xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr);
+
+  // Add child node with specified type. Returns added node, or empty node on errors.
+  xml_node append_child(xml_node_type type = node_element);
+  xml_node prepend_child(xml_node_type type = node_element);
+  xml_node insert_child_after(xml_node_type type, const xml_node& node);
+  xml_node insert_child_before(xml_node_type type, const xml_node& node);
+
+  // Add child element with specified name. Returns added node, or empty node on errors.
+  xml_node append_child(const char_t* name);
+  xml_node prepend_child(const char_t* name);
+  xml_node insert_child_after(const char_t* name, const xml_node& node);
+  xml_node insert_child_before(const char_t* name, const xml_node& node);
+
+  // Add a copy of the specified node as a child. Returns added node, or empty node on errors.
+  xml_node append_copy(const xml_node& proto);
+  xml_node prepend_copy(const xml_node& proto);
+  xml_node insert_copy_after(const xml_node& proto, const xml_node& node);
+  xml_node insert_copy_before(const xml_node& proto, const xml_node& node);
+
+  // Remove specified attribute
+  bool remove_attribute(const xml_attribute& a);
+  bool remove_attribute(const char_t* name);
+
+  // Remove specified child
+  bool remove_child(const xml_node& n);
+  bool remove_child(const char_t* name);
+
+  // Find attribute using predicate. Returns first attribute for which predicate returned true.
+  template <typename Predicate> xml_attribute find_attribute(Predicate pred) const {
+    if (!_root) return xml_attribute();
+
+    for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
+      if (pred(attrib))
+        return attrib;
+
+    return xml_attribute();
+  }
+
+  // Find child node using predicate. Returns first child for which predicate returned true.
+  template <typename Predicate> xml_node find_child(Predicate pred) const {
+    if (!_root) return xml_node();
+
+    for (xml_node node = first_child(); node; node = node.next_sibling())
+      if (pred(node))
+        return node;
+
+    return xml_node();
+  }
+
+  // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true.
+  template <typename Predicate> xml_node find_node(Predicate pred) const {
+    if (!_root) return xml_node();
+
+    xml_node cur = first_child();
+
+    while (cur._root && cur._root != _root) {
+      if (pred(cur)) return cur;
+
+      if (cur.first_child()) cur = cur.first_child();
+      else if (cur.next_sibling()) cur = cur.next_sibling();
+      else {
+        while (!cur.next_sibling() && cur._root != _root) cur = cur.parent();
+
+        if (cur._root != _root) cur = cur.next_sibling();
+      }
+    }
+
+    return xml_node();
+  }
+
+  // Find child node by attribute name/value
+  xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const;
+  xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const;
+
+#ifndef PUGIXML_NO_STL
+  // Get the absolute node path from root as a text string.
+  string_t path(char_t delimiter = '/') const;
+#endif
+
+  // Search for a node by path consisting of node names and . or .. elements.
+  xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const;
+
+  // Recursively traverse subtree with xml_tree_walker
+  bool traverse(xml_tree_walker& walker);
+
+#ifndef PUGIXML_NO_XPATH
+  // Select single node by evaluating XPath query. Returns first node from the resulting node set.
+  xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
+  xpath_node select_single_node(const xpath_query& query) const;
+
+  // Select node set by evaluating XPath query
+  xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
+  xpath_node_set select_nodes(const xpath_query& query) const;
+#endif
+
+  // Print subtree using a writer object
+  void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+
+#ifndef PUGIXML_NO_STL
+  // Print subtree to stream
+  void print(std::basic_ostream<char, std::char_traits<char> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
+  void print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const;
+#endif
+
+  // Child nodes iterators
+  typedef xml_node_iterator iterator;
+
+  iterator begin() const;
+  iterator end() const;
+
+  // Attribute iterators
+  typedef xml_attribute_iterator attribute_iterator;
+
+  attribute_iterator attributes_begin() const;
+  attribute_iterator attributes_end() const;
+
+  // Range-based for support
+  xml_object_range<xml_node_iterator> children() const;
+  xml_object_range<xml_named_node_iterator> children(const char_t* name) const;
+  xml_object_range<xml_attribute_iterator> attributes() const;
+
+  // Get node offset in parsed file/string (in char_t units) for debugging purposes
+  ptrdiff_t offset_debug() const;
+
+  // Get hash value (unique for handles to the same object)
+  size_t hash_value() const;
+
+  // Get internal pointer
+  xml_node_struct* internal_object() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs);
+#endif
+
+// A helper for working with text inside PCDATA nodes
+class PUGIXML_CLASS xml_text
+{
+  friend class xml_node;
+
+  xml_node_struct* _root;
+
+  typedef void (*unspecified_bool_type)(xml_text***);
+
+  explicit xml_text(xml_node_struct* root);
+
+  xml_node_struct* _data_new();
+  xml_node_struct* _data() const;
+
+public:
+  // Default constructor. Constructs an empty object.
+  xml_text();
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Check if text object is empty
+  bool empty() const;
+
+  // Get text, or "" if object is empty
+  const char_t* get() const;
+
+  // Get text, or the default value if object is empty
+  const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const;
+
+  // Get text as a number, or the default value if conversion did not succeed or object is empty
+  int as_int(int def = 0) const;
+  unsigned int as_uint(unsigned int def = 0) const;
+  double as_double(double def = 0) const;
+  float as_float(float def = 0) const;
+
+  // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty
+  bool as_bool(bool def = false) const;
+
+  // Set text (returns false if object is empty or there is not enough memory)
+  bool set(const char_t* rhs);
+
+  // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false")
+  bool set(int rhs);
+  bool set(unsigned int rhs);
+  bool set(double rhs);
+  bool set(bool rhs);
+
+  // Set text (equivalent to set without error checking)
+  xml_text& operator=(const char_t* rhs);
+  xml_text& operator=(int rhs);
+  xml_text& operator=(unsigned int rhs);
+  xml_text& operator=(double rhs);
+  xml_text& operator=(bool rhs);
+
+  // Get the data node (node_pcdata or node_cdata) for this object
+  xml_node data() const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs);
+#endif
+
+// Child node iterator (a bidirectional iterator over a collection of xml_node)
+class PUGIXML_CLASS xml_node_iterator
+{
+  friend class xml_node;
+
+private:
+  mutable xml_node _wrap;
+  xml_node _parent;
+
+  xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent);
+
+public:
+  // Iterator traits
+  typedef ptrdiff_t difference_type;
+  typedef xml_node value_type;
+  typedef xml_node* pointer;
+  typedef xml_node& reference;
+
+#ifndef PUGIXML_NO_STL
+  typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+  // Default constructor
+  xml_node_iterator();
+
+  // Construct an iterator which points to the specified node
+  xml_node_iterator(const xml_node& node);
+
+  // Iterator operators
+  bool operator==(const xml_node_iterator& rhs) const;
+  bool operator!=(const xml_node_iterator& rhs) const;
+
+  xml_node& operator*() const;
+  xml_node* operator->() const;
+
+  const xml_node_iterator& operator++();
+  xml_node_iterator operator++(int);
+
+  const xml_node_iterator& operator--();
+  xml_node_iterator operator--(int);
+};
+
+// Attribute iterator (a bidirectional iterator over a collection of xml_attribute)
+class PUGIXML_CLASS xml_attribute_iterator
+{
+  friend class xml_node;
+
+private:
+  mutable xml_attribute _wrap;
+  xml_node _parent;
+
+  xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent);
+
+public:
+  // Iterator traits
+  typedef ptrdiff_t difference_type;
+  typedef xml_attribute value_type;
+  typedef xml_attribute* pointer;
+  typedef xml_attribute& reference;
+
+#ifndef PUGIXML_NO_STL
+  typedef std::bidirectional_iterator_tag iterator_category;
+#endif
+
+  // Default constructor
+  xml_attribute_iterator();
+
+  // Construct an iterator which points to the specified attribute
+  xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent);
+
+  // Iterator operators
+  bool operator==(const xml_attribute_iterator& rhs) const;
+  bool operator!=(const xml_attribute_iterator& rhs) const;
+
+  xml_attribute& operator*() const;
+  xml_attribute* operator->() const;
+
+  const xml_attribute_iterator& operator++();
+  xml_attribute_iterator operator++(int);
+
+  const xml_attribute_iterator& operator--();
+  xml_attribute_iterator operator--(int);
+};
+
+// Named node range helper
+class xml_named_node_iterator
+{
+public:
+  // Iterator traits
+  typedef ptrdiff_t difference_type;
+  typedef xml_node value_type;
+  typedef xml_node* pointer;
+  typedef xml_node& reference;
+
+#ifndef PUGIXML_NO_STL
+  typedef std::forward_iterator_tag iterator_category;
+#endif
+
+  // Default constructor
+  xml_named_node_iterator();
+
+  // Construct an iterator which points to the specified node
+  xml_named_node_iterator(const xml_node& node, const char_t* name);
+
+  // Iterator operators
+  bool operator==(const xml_named_node_iterator& rhs) const;
+  bool operator!=(const xml_named_node_iterator& rhs) const;
+
+  xml_node& operator*() const;
+  xml_node* operator->() const;
+
+  const xml_named_node_iterator& operator++();
+  xml_named_node_iterator operator++(int);
+
+private:
+  mutable xml_node _node;
+  const char_t* _name;
+};
+
+// Abstract tree walker class (see xml_node::traverse)
+class PUGIXML_CLASS xml_tree_walker
+{
+  friend class xml_node;
+
+private:
+  int _depth;
+
+protected:
+  // Get current traversal depth
+  int depth() const;
+
+public:
+  xml_tree_walker();
+  virtual ~xml_tree_walker();
+
+  // Callback that is called when traversal begins
+  virtual bool begin(xml_node& node);
+
+  // Callback that is called for each node traversed
+  virtual bool for_each(xml_node& node) = 0;
+
+  // Callback that is called when traversal ends
+  virtual bool end(xml_node& node);
+};
+
+// Parsing status, returned as part of xml_parse_result object
+enum xml_parse_status {
+  status_ok = 0,				// No error
+
+  status_file_not_found,		// File was not found during load_file()
+  status_io_error,			// Error reading from file/stream
+  status_out_of_memory,		// Could not allocate memory
+  status_internal_error,		// Internal error occurred
+
+  status_unrecognized_tag,	// Parser could not determine tag type
+
+  status_bad_pi,				// Parsing error occurred while parsing document declaration/processing instruction
+  status_bad_comment,			// Parsing error occurred while parsing comment
+  status_bad_cdata,			// Parsing error occurred while parsing CDATA section
+  status_bad_doctype,			// Parsing error occurred while parsing document type declaration
+  status_bad_pcdata,			// Parsing error occurred while parsing PCDATA section
+  status_bad_start_element,	// Parsing error occurred while parsing start element tag
+  status_bad_attribute,		// Parsing error occurred while parsing element attribute
+  status_bad_end_element,		// Parsing error occurred while parsing end element tag
+  status_end_element_mismatch // There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag)
+};
+
+// Parsing result
+struct PUGIXML_CLASS xml_parse_result {
+  // Parsing status (see xml_parse_status)
+  xml_parse_status status;
+
+  // Last parsed offset (in char_t units from start of input data)
+  ptrdiff_t offset;
+
+  // Source document encoding
+  xml_encoding encoding;
+
+  // Default constructor, initializes object to failed state
+  xml_parse_result();
+
+  // Cast to bool operator
+  operator bool() const;
+
+  // Get error description
+  const char* description() const;
+};
+
+// Document class (DOM tree root)
+class PUGIXML_CLASS xml_document: public xml_node
+{
+private:
+  char_t* _buffer;
+
+  char _memory[192];
+
+  // Non-copyable semantics
+  xml_document(const xml_document&);
+  const xml_document& operator=(const xml_document&);
+
+  void create();
+  void destroy();
+
+  xml_parse_result load_buffer_impl(void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own);
+
+public:
+  // Default constructor, makes empty document
+  xml_document();
+
+  // Destructor, invalidates all node/attribute handles to this document
+  ~xml_document();
+
+  // Removes all nodes, leaving the empty document
+  void reset();
+
+  // Removes all nodes, then copies the entire contents of the specified document
+  void reset(const xml_document& proto);
+
+#ifndef PUGIXML_NO_STL
+  // Load document from stream.
+  xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+  xml_parse_result load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options = parse_default);
+#endif
+
+  // Load document from zero-terminated string. No encoding conversions are applied.
+  xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
+
+  // Load document from file
+  xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+  xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns.
+  xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+  // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed.
+  xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data).
+  // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore).
+  xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
+
+  // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details).
+  void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+#ifndef PUGIXML_NO_STL
+  // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details).
+  void save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+  void save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const;
+#endif
+
+  // Save XML to file
+  bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+  bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const;
+
+  // Get document element
+  xml_node document_element() const;
+};
+
+#ifndef PUGIXML_NO_XPATH
+// XPath query return type
+enum xpath_value_type {
+  xpath_type_none,	  // Unknown type (query failed to compile)
+  xpath_type_node_set,  // Node set (xpath_node_set)
+  xpath_type_number,	  // Number
+  xpath_type_string,	  // String
+  xpath_type_boolean	  // Boolean
+};
+
+// XPath parsing result
+struct PUGIXML_CLASS xpath_parse_result {
+  // Error message (0 if no error)
+  const char* error;
+
+  // Last parsed offset (in char_t units from string start)
+  ptrdiff_t offset;
+
+  // Default constructor, initializes object to failed state
+  xpath_parse_result();
+
+  // Cast to bool operator
+  operator bool() const;
+
+  // Get error description
+  const char* description() const;
+};
+
+// A single XPath variable
+class PUGIXML_CLASS xpath_variable
+{
+  friend class xpath_variable_set;
+
+protected:
+  xpath_value_type _type;
+  xpath_variable* _next;
+
+  xpath_variable();
+
+  // Non-copyable semantics
+  xpath_variable(const xpath_variable&);
+  xpath_variable& operator=(const xpath_variable&);
+
+public:
+  // Get variable name
+  const char_t* name() const;
+
+  // Get variable type
+  xpath_value_type type() const;
+
+  // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error
+  bool get_boolean() const;
+  double get_number() const;
+  const char_t* get_string() const;
+  const xpath_node_set& get_node_set() const;
+
+  // Set variable value; no type conversion is performed, false is returned on type mismatch error
+  bool set(bool value);
+  bool set(double value);
+  bool set(const char_t* value);
+  bool set(const xpath_node_set& value);
+};
+
+// A set of XPath variables
+class PUGIXML_CLASS xpath_variable_set
+{
+private:
+  xpath_variable* _data[64];
+
+  // Non-copyable semantics
+  xpath_variable_set(const xpath_variable_set&);
+  xpath_variable_set& operator=(const xpath_variable_set&);
+
+  xpath_variable* find(const char_t* name) const;
+
+public:
+  // Default constructor/destructor
+  xpath_variable_set();
+  ~xpath_variable_set();
+
+  // Add a new variable or get the existing one, if the types match
+  xpath_variable* add(const char_t* name, xpath_value_type type);
+
+  // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch
+  bool set(const char_t* name, bool value);
+  bool set(const char_t* name, double value);
+  bool set(const char_t* name, const char_t* value);
+  bool set(const char_t* name, const xpath_node_set& value);
+
+  // Get existing variable by name
+  xpath_variable* get(const char_t* name);
+  const xpath_variable* get(const char_t* name) const;
+};
+
+// A compiled XPath query object
+class PUGIXML_CLASS xpath_query
+{
+private:
+  void* _impl;
+  xpath_parse_result _result;
+
+  typedef void (*unspecified_bool_type)(xpath_query***);
+
+  // Non-copyable semantics
+  xpath_query(const xpath_query&);
+  xpath_query& operator=(const xpath_query&);
+
+public:
+  // Construct a compiled object from XPath expression.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors.
+  explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0);
+
+  // Destructor
+  ~xpath_query();
+
+  // Get query expression return type
+  xpath_value_type return_type() const;
+
+  // Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  bool evaluate_boolean(const xpath_node& n) const;
+
+  // Evaluate expression as double value in the specified context; performs type conversion if necessary.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  double evaluate_number(const xpath_node& n) const;
+
+#ifndef PUGIXML_NO_STL
+  // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  string_t evaluate_string(const xpath_node& n) const;
+#endif
+
+  // Evaluate expression as string value in the specified context; performs type conversion if necessary.
+  // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
+  // If PUGIXML_NO_EXCEPTIONS is defined, returns empty  set instead.
+  size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const;
+
+  // Evaluate expression as node set in the specified context.
+  // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors.
+  // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead.
+  xpath_node_set evaluate_node_set(const xpath_node& n) const;
+
+  // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode)
+  const xpath_parse_result& result() const;
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+};
+
+#ifndef PUGIXML_NO_EXCEPTIONS
+// XPath exception class
+class PUGIXML_CLASS xpath_exception: public std::exception
+{
+private:
+  xpath_parse_result _result;
+
+public:
+  // Construct exception from parse result
+  explicit xpath_exception(const xpath_parse_result& result);
+
+  // Get error message
+  virtual const char* what() const throw();
+
+  // Get parse result
+  const xpath_parse_result& result() const;
+};
+#endif
+
+// XPath node class (either xml_node or xml_attribute)
+class PUGIXML_CLASS xpath_node
+{
+private:
+  xml_node _node;
+  xml_attribute _attribute;
+
+  typedef void (*unspecified_bool_type)(xpath_node***);
+
+public:
+  // Default constructor; constructs empty XPath node
+  xpath_node();
+
+  // Construct XPath node from XML node/attribute
+  xpath_node(const xml_node& node);
+  xpath_node(const xml_attribute& attribute, const xml_node& parent);
+
+  // Get node/attribute, if any
+  xml_node node() const;
+  xml_attribute attribute() const;
+
+  // Get parent of contained node/attribute
+  xml_node parent() const;
+
+  // Safe bool conversion operator
+  operator unspecified_bool_type() const;
+
+  // Borland C++ workaround
+  bool operator!() const;
+
+  // Comparison operators
+  bool operator==(const xpath_node& n) const;
+  bool operator!=(const xpath_node& n) const;
+};
+
+#ifdef __BORLANDC__
+// Borland C++ workaround
+bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs);
+bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs);
+#endif
+
+// A fixed-size collection of XPath nodes
+class PUGIXML_CLASS xpath_node_set
+{
+public:
+  // Collection type
+  enum type_t {
+    type_unsorted,			// Not ordered
+    type_sorted,			// Sorted by document order (ascending)
+    type_sorted_reverse		// Sorted by document order (descending)
+  };
+
+  // Constant iterator type
+  typedef const xpath_node* const_iterator;
+
+  // Default constructor. Constructs empty set.
+  xpath_node_set();
+
+  // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful
+  xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted);
+
+  // Destructor
+  ~xpath_node_set();
+
+  // Copy constructor/assignment operator
+  xpath_node_set(const xpath_node_set& ns);
+  xpath_node_set& operator=(const xpath_node_set& ns);
+
+  // Get collection type
+  type_t type() const;
+
+  // Get collection size
+  size_t size() const;
+
+  // Indexing operator
+  const xpath_node& operator[](size_t index) const;
+
+  // Collection iterators
+  const_iterator begin() const;
+  const_iterator end() const;
+
+  // Sort the collection in ascending/descending order by document order
+  void sort(bool reverse = false);
+
+  // Get first node in the collection by document order
+  xpath_node first() const;
+
+  // Check if collection is empty
+  bool empty() const;
+
+private:
+  type_t _type;
+
+  xpath_node _storage;
+
+  xpath_node* _begin;
+  xpath_node* _end;
+
+  void _assign(const_iterator begin, const_iterator end);
+};
+#endif
+
+#ifndef PUGIXML_NO_STL
+// Convert wide string to UTF8
+std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
+std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
+
+// Convert UTF8 to wide string
+std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
+std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
+#endif
+
+// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
+typedef void* (*allocation_function)(size_t size);
+
+// Memory deallocation function interface
+typedef void (*deallocation_function)(void* ptr);
+
+// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
+void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
+
+// Get current memory management functions
+allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
+deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
+}
+
+#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&);
+std::forward_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
+namespace std
+{
+// Workarounds for (non-standard) iterator category detection
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&);
+std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&);
+std::forward_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&);
+}
+#endif
+
+#endif
+
+/**
+ * Copyright (c) 2006-2012 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/mosesdecoder/phrase-extract/score-stsg/Jamfile b/mosesdecoder/phrase-extract/score-stsg/Jamfile
new file mode 100644
index 0000000000000000000000000000000000000000..6ae17b565158a726c9f58772da412672d87571b0
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/Jamfile
@@ -0,0 +1 @@
+exe score-stsg : [ glob *.cpp ] ..//syntax-common ..//deps ../..//boost_iostreams ../..//boost_program_options ../..//z : <include>.. ;
diff --git a/mosesdecoder/phrase-extract/score-stsg/LexicalTable.h b/mosesdecoder/phrase-extract/score-stsg/LexicalTable.h
new file mode 100644
index 0000000000000000000000000000000000000000..54bae1dec0f9d9c4362bea71045df36f11a3a15f
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/LexicalTable.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <istream>
+#include <string>
+#include <vector>
+
+#include <boost/unordered_map.hpp>
+
+#include "Vocabulary.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+class LexicalTable
+{
+public:
+  LexicalTable(Vocabulary &, Vocabulary &);
+
+  void Load(std::istream &);
+
+  double PermissiveLookup(Vocabulary::IdType s, Vocabulary::IdType t) {
+    OuterMap::const_iterator p = m_table.find(s);
+    if (p == m_table.end()) {
+      return 1.0;
+    }
+    const InnerMap &inner = p->second;
+    InnerMap::const_iterator q = inner.find(t);
+    return q == inner.end() ? 1.0 : q->second;
+  }
+
+private:
+  typedef boost::unordered_map<Vocabulary::IdType, double> InnerMap;
+  typedef boost::unordered_map<Vocabulary::IdType, InnerMap> OuterMap;
+
+  Vocabulary &m_srcVocab;
+  Vocabulary &m_tgtVocab;
+  OuterMap m_table;
+};
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/RuleGroup.cpp b/mosesdecoder/phrase-extract/score-stsg/RuleGroup.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a4e6ff3a2145ac2babe1a7c3d2445a17bcf09724
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/RuleGroup.cpp
@@ -0,0 +1,47 @@
+#include "RuleGroup.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+void RuleGroup::SetNewSource(const StringPiece &source)
+{
+  source.CopyToString(&m_source);
+  m_distinctRules.clear();
+  m_totalCount = 0;
+}
+
+void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
+                        const StringPiece &fullAlign, int count,
+                        double treeScore)
+{
+  if (m_distinctRules.empty() ||
+      ntAlign != m_distinctRules.back().ntAlign ||
+      target != m_distinctRules.back().target) {
+    DistinctRule r;
+    target.CopyToString(&r.target);
+    ntAlign.CopyToString(&r.ntAlign);
+    r.alignments.resize(r.alignments.size()+1);
+    fullAlign.CopyToString(&r.alignments.back().first);
+    r.alignments.back().second = count;
+    r.count = count;
+    r.treeScore = treeScore;
+    m_distinctRules.push_back(r);
+  } else {
+    DistinctRule &r = m_distinctRules.back();
+    if (r.alignments.back().first != fullAlign) {
+      r.alignments.resize(r.alignments.size()+1);
+      fullAlign.CopyToString(&r.alignments.back().first);
+    }
+    r.alignments.back().second += count;
+    r.count += count;
+  }
+  m_totalCount += count;
+}
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/RuleSymbol.h b/mosesdecoder/phrase-extract/score-stsg/RuleSymbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..e8cd9645888b9cdb080461e4314ef96f9e70c6cd
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/RuleSymbol.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "util/string_piece.hh"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+struct RuleSymbol {
+  StringPiece value;
+  bool isNonTerminal;
+};
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/RuleTableWriter.h b/mosesdecoder/phrase-extract/score-stsg/RuleTableWriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..4f7df99244789f634d12beebb3c0192097b61d19
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/RuleTableWriter.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <cmath>
+#include <string>
+
+#include "OutputFileStream.h"
+
+#include "Options.h"
+#include "TokenizedRuleHalf.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+class RuleTableWriter
+{
+public:
+  RuleTableWriter(const Options &options, Moses::OutputFileStream &out)
+    : m_options(options)
+    , m_out(out) {}
+
+  void WriteLine(const TokenizedRuleHalf &, const TokenizedRuleHalf &,
+                 const std::string &, double, double, int, int, int);
+
+private:
+  double MaybeLog(double a) const {
+    if (!m_options.logProb) {
+      return a;
+    }
+    return m_options.negLogProb ? -log(a) : log(a);
+  }
+
+  void WriteRuleHalf(const TokenizedRuleHalf &);
+
+  const Options &m_options;
+  Moses::OutputFileStream &m_out;
+};
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/ScoreStsg.cpp b/mosesdecoder/phrase-extract/score-stsg/ScoreStsg.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f6df0d0da80b86733df1c46d480d2995ccc4b758
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/ScoreStsg.cpp
@@ -0,0 +1,431 @@
+#include "ScoreStsg.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include "util/string_piece.hh"
+#include "util/string_piece_hash.hh"
+#include "util/tokenize_piece.hh"
+
+#include "InputFileStream.h"
+#include "OutputFileStream.h"
+
+#include "syntax-common/exception.h"
+
+#include "LexicalTable.h"
+#include "Options.h"
+#include "RuleGroup.h"
+#include "RuleTableWriter.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+const int ScoreStsg::kCountOfCountsMax = 10;
+
+ScoreStsg::ScoreStsg()
+  : Tool("score-stsg")
+  , m_lexTable(m_srcVocab, m_tgtVocab)
+  , m_countOfCounts(kCountOfCountsMax, 0)
+  , m_totalDistinct(0)
+{
+}
+
+int ScoreStsg::Main(int argc, char *argv[])
+{
+  // Process command-line options.
+  ProcessOptions(argc, argv, m_options);
+
+  // Open input files.
+  Moses::InputFileStream extractStream(m_options.extractFile);
+  Moses::InputFileStream lexStream(m_options.lexFile);
+
+  // Open output files.
+  Moses::OutputFileStream outStream;
+  Moses::OutputFileStream countOfCountsStream;
+  OpenOutputFileOrDie(m_options.tableFile, outStream);
+  if (m_options.goodTuring || m_options.kneserNey) {
+    OpenOutputFileOrDie(m_options.tableFile+".coc", countOfCountsStream);
+  }
+
+  // Load lexical table.
+  if (!m_options.noLex) {
+    m_lexTable.Load(lexStream);
+  }
+
+  const util::MultiCharacter delimiter("|||");
+  std::size_t lineNum = 0;
+  std::size_t startLine= 0;
+  std::string line;
+  std::string tmp;
+  RuleGroup ruleGroup;
+  RuleTableWriter ruleTableWriter(m_options, outStream);
+
+  while (std::getline(extractStream, line)) {
+    ++lineNum;
+
+    // Tokenize the input line.
+    util::TokenIter<util::MultiCharacter> it(line, delimiter);
+    StringPiece source = *it++;
+    StringPiece target = *it++;
+    StringPiece ntAlign = *it++;
+    StringPiece fullAlign = *it++;
+    it->CopyToString(&tmp);
+    int count = std::atoi(tmp.c_str());
+    double treeScore = 0.0f;
+    if (m_options.treeScore && !m_options.inverse) {
+      ++it;
+      it->CopyToString(&tmp);
+      treeScore = std::atof(tmp.c_str());
+    }
+
+    // If this is the first line or if source has changed since the last
+    // line then process the current rule group and start a new one.
+    if (source != ruleGroup.GetSource()) {
+      if (lineNum > 1) {
+        ProcessRuleGroupOrDie(ruleGroup, ruleTableWriter, startLine, lineNum-1);
+      }
+      startLine = lineNum;
+      ruleGroup.SetNewSource(source);
+    }
+
+    // Add the rule to the current rule group.
+    ruleGroup.AddRule(target, ntAlign, fullAlign, count, treeScore);
+  }
+
+  // Process the final rule group.
+  ProcessRuleGroupOrDie(ruleGroup, ruleTableWriter, startLine, lineNum);
+
+  // Write count of counts file.
+  if (m_options.goodTuring || m_options.kneserNey) {
+    // Kneser-Ney needs the total number of distinct rules.
+    countOfCountsStream << m_totalDistinct << std::endl;
+    // Write out counts of counts.
+    for (int i = 1; i <= kCountOfCountsMax; ++i) {
+      countOfCountsStream << m_countOfCounts[i] << std::endl;
+    }
+  }
+
+  return 0;
+}
+
+void ScoreStsg::TokenizeRuleHalf(const std::string &s, TokenizedRuleHalf &half)
+{
+  // Copy s to half.string, but strip any leading or trailing whitespace.
+  std::size_t start = s.find_first_not_of(" \t");
+  if (start == std::string::npos) {
+    throw Exception("rule half is empty");
+  }
+  std::size_t end = s.find_last_not_of(" \t");
+  assert(end != std::string::npos);
+  half.string = s.substr(start, end-start+1);
+
+  // Tokenize half.string.
+  half.tokens.clear();
+  for (TreeFragmentTokenizer p(half.string);
+       p != TreeFragmentTokenizer(); ++p) {
+    half.tokens.push_back(*p);
+  }
+
+  // Extract the frontier symbols.
+  half.frontierSymbols.clear();
+  const std::size_t numTokens = half.tokens.size();
+  for (int i = 0; i < numTokens; ++i) {
+    if (half.tokens[i].type != TreeFragmentToken_WORD) {
+      continue;
+    }
+    if (i == 0 || half.tokens[i-1].type != TreeFragmentToken_LSB) {
+      // A word is a terminal iff it doesn't follow '['
+      half.frontierSymbols.resize(half.frontierSymbols.size()+1);
+      half.frontierSymbols.back().value = half.tokens[i].value;
+      half.frontierSymbols.back().isNonTerminal = false;
+    } else if (i+1 < numTokens &&
+               half.tokens[i+1].type == TreeFragmentToken_RSB) {
+      // A word is a non-terminal iff it it follows '[' and is succeeded by ']'
+      half.frontierSymbols.resize(half.frontierSymbols.size()+1);
+      half.frontierSymbols.back().value = half.tokens[i].value;
+      half.frontierSymbols.back().isNonTerminal = true;
+      ++i;  // Skip over the ']'
+    }
+  }
+}
+
+void ScoreStsg::ProcessRuleGroupOrDie(const RuleGroup &group,
+                                      RuleTableWriter &writer,
+                                      std::size_t start,
+                                      std::size_t end)
+{
+  try {
+    ProcessRuleGroup(group, writer);
+  } catch (const Exception &e) {
+    std::ostringstream msg;
+    msg << "failed to process rule group at lines " << start << "-" << end
+        << ": " << e.msg();
+    Error(msg.str());
+  } catch (const std::exception &e) {
+    std::ostringstream msg;
+    msg << "failed to process rule group at lines " << start << "-" << end
+        << ": " << e.what();
+    Error(msg.str());
+  }
+}
+
+void ScoreStsg::ProcessRuleGroup(const RuleGroup &group,
+                                 RuleTableWriter &writer)
+{
+  const std::size_t totalCount = group.GetTotalCount();
+  const std::size_t distinctCount = group.GetSize();
+
+  TokenizeRuleHalf(group.GetSource(), m_sourceHalf);
+
+  const bool fullyLexical = m_sourceHalf.IsFullyLexical();
+
+  // Process each distinct rule in turn.
+  for (RuleGroup::ConstIterator p = group.Begin(); p != group.End(); ++p) {
+    const RuleGroup::DistinctRule &rule = *p;
+
+    // Update count of count statistics.
+    if (m_options.goodTuring || m_options.kneserNey) {
+      ++m_totalDistinct;
+      int countInt = rule.count + 0.99999;
+      if (countInt <= kCountOfCountsMax) {
+        ++m_countOfCounts[countInt];
+      }
+    }
+
+    // If the rule is not fully lexical then discard it if the count is below
+    // the threshold value.
+    if (!fullyLexical && rule.count < m_options.minCountHierarchical) {
+      continue;
+    }
+
+    TokenizeRuleHalf(rule.target, m_targetHalf);
+
+    // Find the most frequent alignment (if there's a tie, take the first one).
+    std::vector<std::pair<std::string, int> >::const_iterator q =
+      rule.alignments.begin();
+    const std::pair<std::string, int> *bestAlignmentAndCount = &(*q++);
+    for (; q != rule.alignments.end(); ++q) {
+      if (q->second > bestAlignmentAndCount->second) {
+        bestAlignmentAndCount = &(*q);
+      }
+    }
+    const std::string &bestAlignment = bestAlignmentAndCount->first;
+    ParseAlignmentString(bestAlignment, m_targetHalf.frontierSymbols.size(),
+                         m_tgtToSrc);
+
+    // Compute the lexical translation probability.
+    double lexProb = ComputeLexProb(m_sourceHalf.frontierSymbols,
+                                    m_targetHalf.frontierSymbols, m_tgtToSrc);
+
+    // Write a line to the rule table.
+    writer.WriteLine(m_sourceHalf, m_targetHalf, bestAlignment, lexProb,
+                     rule.treeScore, p->count, totalCount, distinctCount);
+  }
+}
+
+void ScoreStsg::ParseAlignmentString(const std::string &s, int numTgtWords,
+                                     ALIGNMENT &tgtToSrc)
+{
+  tgtToSrc.clear();
+  tgtToSrc.resize(numTgtWords);
+
+  const std::string digits = "0123456789";
+
+  std::string::size_type begin = 0;
+  while (true) {
+    std::string::size_type end = s.find("-", begin);
+    if (end == std::string::npos) {
+      return;
+    }
+    int src = std::atoi(s.substr(begin, end-begin).c_str());
+    if (end+1 == s.size()) {
+      throw Exception("Target index missing");
+    }
+    begin = end+1;
+    end = s.find_first_not_of(digits, begin+1);
+    int tgt;
+    if (end == std::string::npos) {
+      tgt = std::atoi(s.substr(begin).c_str());
+      tgtToSrc[tgt].insert(src);
+      return;
+    } else {
+      tgt = std::atoi(s.substr(begin, end-begin).c_str());
+      tgtToSrc[tgt].insert(src);
+    }
+    begin = end+1;
+  }
+}
+
+double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
+                                 const std::vector<RuleSymbol> &targetFrontier,
+                                 const ALIGNMENT &tgtToSrc)
+{
+  double lexScore = 1.0;
+  for (std::size_t i = 0; i < targetFrontier.size(); ++i) {
+    if (targetFrontier[i].isNonTerminal) {
+      continue;
+    }
+    Vocabulary::IdType tgtId = m_tgtVocab.Lookup(targetFrontier[i].value,
+                               StringPieceCompatibleHash(),
+                               StringPieceCompatibleEquals());
+    const std::set<std::size_t> &srcIndices = tgtToSrc[i];
+    if (srcIndices.empty()) {
+      // Explain unaligned word by NULL.
+      lexScore *= m_lexTable.PermissiveLookup(Vocabulary::NullId(), tgtId);
+    } else {
+      double thisWordScore = 0.0;
+      for (std::set<std::size_t>::const_iterator p = srcIndices.begin();
+           p != srcIndices.end(); ++p) {
+        Vocabulary::IdType srcId =
+          m_srcVocab.Lookup(sourceFrontier[*p].value,
+                            StringPieceCompatibleHash(),
+                            StringPieceCompatibleEquals());
+        thisWordScore += m_lexTable.PermissiveLookup(srcId, tgtId);
+      }
+      lexScore *= thisWordScore / static_cast<double>(srcIndices.size());
+    }
+  }
+  return lexScore;
+}
+
+void ScoreStsg::ProcessOptions(int argc, char *argv[], Options &options) const
+{
+  namespace po = boost::program_options;
+  namespace cls = boost::program_options::command_line_style;
+
+  // Construct the 'top' of the usage message: the bit that comes before the
+  // options list.
+  std::ostringstream usageTop;
+  usageTop << "Usage: " << name()
+           << " [OPTION]... EXTRACT LEX TABLE\n\n"
+           << "STSG rule scorer\n\n"
+           << "Options";
+
+  // Construct the 'bottom' of the usage message.
+  std::ostringstream usageBottom;
+  usageBottom << "TODO";
+
+  // Declare the command line options that are visible to the user.
+  po::options_description visible(usageTop.str());
+  visible.add_options()
+  ("GoodTuring",
+   "apply Good-Turing smoothing to relative frequency probability estimates")
+  ("Hierarchical",
+   "ignored (included for compatibility with score)")
+  ("Inverse",
+   "use inverse mode")
+  ("KneserNey",
+   "apply Kneser-Ney smoothing to relative frequency probability estimates")
+  ("LogProb",
+   "output log probabilities")
+  ("MinCountHierarchical",
+   po::value(&options.minCountHierarchical)->
+   default_value(options.minCountHierarchical),
+   "filter out rules with frequency < arg (except fully lexical rules)")
+  ("NegLogProb",
+   "output negative log probabilities")
+  ("NoLex",
+   "do not compute lexical translation score")
+  ("NoWordAlignment",
+   "do not output word alignments")
+  ("PCFG",
+   "synonym for TreeScore (included for compatibility with score)")
+  ("TreeScore",
+   "include pre-computed tree score from extract")
+  ("UnpairedExtractFormat",
+   "ignored (included for compatibility with score)")
+  ;
+
+  // Declare the command line options that are hidden from the user
+  // (these are used as positional options).
+  po::options_description hidden("Hidden options");
+  hidden.add_options()
+  ("ExtractFile",
+   po::value(&options.extractFile),
+   "extract file")
+  ("LexFile",
+   po::value(&options.lexFile),
+   "lexical probability file")
+  ("TableFile",
+   po::value(&options.tableFile),
+   "output file")
+  ;
+
+  // Compose the full set of command-line options.
+  po::options_description cmdLineOptions;
+  cmdLineOptions.add(visible).add(hidden);
+
+  // Register the positional options.
+  po::positional_options_description p;
+  p.add("ExtractFile", 1);
+  p.add("LexFile", 1);
+  p.add("TableFile", 1);
+
+  // Process the command-line.
+  po::variables_map vm;
+  try {
+    po::store(po::command_line_parser(argc, argv).style(MosesOptionStyle()).
+              options(cmdLineOptions).positional(p).run(), vm);
+    po::notify(vm);
+  } catch (const std::exception &e) {
+    std::ostringstream msg;
+    msg << e.what() << "\n\n" << visible << usageBottom.str();
+    Error(msg.str());
+  }
+
+  if (vm.count("help")) {
+    std::cout << visible << usageBottom.str() << std::endl;
+    std::exit(0);
+  }
+
+  // Check all positional options were given.
+  if (!vm.count("ExtractFile") ||
+      !vm.count("LexFile") ||
+      !vm.count("TableFile")) {
+    std::ostringstream msg;
+    std::cerr << visible << usageBottom.str() << std::endl;
+    std::exit(1);
+  }
+
+  // Process Boolean options.
+  if (vm.count("GoodTuring")) {
+    options.goodTuring = true;
+  }
+  if (vm.count("Inverse")) {
+    options.inverse = true;
+  }
+  if (vm.count("KneserNey")) {
+    options.kneserNey = true;
+  }
+  if (vm.count("LogProb")) {
+    options.logProb = true;
+  }
+  if (vm.count("NegLogProb")) {
+    options.negLogProb = true;
+  }
+  if (vm.count("NoLex")) {
+    options.noLex = true;
+  }
+  if (vm.count("NoWordAlignment")) {
+    options.noWordAlignment = true;
+  }
+  if (vm.count("TreeScore") || vm.count("PCFG")) {
+    options.treeScore = true;
+  }
+}
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/ScoreStsg.h b/mosesdecoder/phrase-extract/score-stsg/ScoreStsg.h
new file mode 100644
index 0000000000000000000000000000000000000000..1757e181bca1cf4c711b7b3641f7ec583e7299da
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/ScoreStsg.h
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <map>
+#include <ostream>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "ExtractionPhrasePair.h"
+#include "OutputFileStream.h"
+
+#include "syntax-common/tool.h"
+
+#include "LexicalTable.h"
+#include "Options.h"
+#include "RuleSymbol.h"
+#include "TokenizedRuleHalf.h"
+#include "Vocabulary.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+class RuleGroup;
+class RuleTableWriter;
+
+class ScoreStsg : public Tool
+{
+public:
+  ScoreStsg();
+
+  virtual int Main(int argc, char *argv[]);
+
+private:
+  static const int kCountOfCountsMax;
+
+  double ComputeLexProb(const std::vector<RuleSymbol> &,
+                        const std::vector<RuleSymbol> &,
+                        const ALIGNMENT &);
+
+  void ParseAlignmentString(const std::string &, int,
+                            ALIGNMENT &);
+
+  void ProcessOptions(int, char *[], Options &) const;
+
+  void ProcessRuleGroup(const RuleGroup &, RuleTableWriter &);
+
+  void ProcessRuleGroupOrDie(const RuleGroup &, RuleTableWriter &,
+                             std::size_t, std::size_t);
+
+  void TokenizeRuleHalf(const std::string &, TokenizedRuleHalf &);
+
+  Options m_options;
+  Vocabulary m_srcVocab;
+  Vocabulary m_tgtVocab;
+  LexicalTable m_lexTable;
+  std::vector<int> m_countOfCounts;
+  int m_totalDistinct;
+  TokenizedRuleHalf m_sourceHalf;
+  TokenizedRuleHalf m_targetHalf;
+  ALIGNMENT m_tgtToSrc;
+};
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/TokenizedRuleHalf.cpp b/mosesdecoder/phrase-extract/score-stsg/TokenizedRuleHalf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6ccc2a311dd3262d2d22318428947bf76b309e29
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/TokenizedRuleHalf.cpp
@@ -0,0 +1,40 @@
+#include "TokenizedRuleHalf.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+bool TokenizedRuleHalf::IsFullyLexical() const
+{
+  for (std::vector<RuleSymbol>::const_iterator p = frontierSymbols.begin();
+       p != frontierSymbols.end(); ++p) {
+    if (p->isNonTerminal) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool TokenizedRuleHalf::IsString() const
+{
+  // A rule half is either a string (like "[X] and [X]") or a tree (like
+  // "[NP [NP] [CC and] [NP]]").
+  //
+  // A string must start with a terminal or a non-terminal (in square brackets).
+  // A tree must start with '[' followed by a word then either another word or
+  // another '['.
+  return (tokens[0].type == TreeFragmentToken_WORD ||
+          tokens[2].type == TreeFragmentToken_RSB);
+}
+
+bool TokenizedRuleHalf::IsTree() const
+{
+  return !IsString();
+}
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/phrase-extract/score-stsg/Vocabulary.h b/mosesdecoder/phrase-extract/score-stsg/Vocabulary.h
new file mode 100644
index 0000000000000000000000000000000000000000..6370544f47f605b1dcf4a1bf74f95ce9351664bf
--- /dev/null
+++ b/mosesdecoder/phrase-extract/score-stsg/Vocabulary.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <string>
+
+#include "syntax-common/numbered_set.h"
+
+namespace MosesTraining
+{
+namespace Syntax
+{
+namespace ScoreStsg
+{
+
+typedef NumberedSet<std::string, std::size_t> Vocabulary;
+
+}  // namespace ScoreStsg
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/mosesdecoder/run-regtests.sh b/mosesdecoder/run-regtests.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f2c02aaa807d9014dfc97b36aaed14331350e2ce
--- /dev/null
+++ b/mosesdecoder/run-regtests.sh
@@ -0,0 +1,83 @@
+#!/bin/bash 
+# this script assumes that all 3rd-party dependencies are installed under ./opt
+# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
+
+set -e -o pipefail 
+
+opt=$(pwd)/opt
+
+args=$(getopt -oj:aq -lwith-irstlm:,with-boost:,with-cmph:,with-regtest:,no-xmlrpc-c,with-xmlrpc-c:,full -- "$@")
+eval set -- "$args"
+
+# default settings
+noserver=false; 
+full=false;
+j=$(getconf _NPROCESSORS_ONLN)
+irstlm=$opt/irstlm-5.80.08
+boost=$opt
+cmph=$opt
+xmlrpc=--with-xmlrpc-c\=$opt 
+regtest=$(pwd)/regtest
+unset q
+unset a
+# the regression test for the compactpt bug is currently know to fail,
+# let's skip it for the time being
+skipcompact=--regtest-skip-compactpt
+
+# overrides from command line
+while true ; do 
+    case "$1" in 
+	-j ) j=$2; shift 2 ;;
+	-a ) a=-a; shift ;;
+	-q ) q=-q; shift ;;
+	--no-xmlrpc-c   ) xmlrpc=$1;     shift ;;  
+	--with-xmlrpc-c ) 
+	    xmlrpc=--with-xmlrpc-c\=$2;  shift 2 ;;  
+	--with-irstlm   ) irstlm=$2;     shift 2 ;;
+	--with-boost    ) boost=$2;      shift 2 ;;
+	--with-cmph     ) cmph=$2;       shift 2 ;;
+	--with-regtest  ) regtest=$2;    shift 2 ;;
+	--full          ) full=true;     shift 2 ;;  
+	-- ) shift; break ;;
+	* ) break ;;
+    esac
+done
+
+if [ $? != 0 ] ; then exit $?; fi
+
+git submodule init
+git submodule update regtest
+
+# full test means 
+# -- compile from scratch without server, run regtests
+# -- compile from scratch with server, run regtests
+set -x
+if [ "$full" == true ] ; then
+    ./bjam -j$j --with-mm --with-mm-extras --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest -a $skipcompact $@ $q || exit $?
+    if ./regression-testing/run-single-test.perl --server --startuptest  ; then
+    	./bjam -j$j --with-mm --with-mm-extras --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest -a $skipcompact $@ $q 
+    fi
+else
+   # when investigating failures, always run single-threaded
+   if [ "$q" == "-q" ] ; then j=1; fi 
+
+   if ./regression-testing/run-single-test.perl --server --startuptest  ; then
+       ./bjam -j$j --with-mm $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest $skipcompact $@ 
+   else
+       ./bjam -j$j --with-mm --with-mm-extras $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest $skipcompact $@ 
+   fi
+fi
+
+# if [ "$RECOMPILE" == "NO" ] ; then
+#   RECOMPILE=
+# else
+#   RECOMPILE="-a"
+# fi
+
+# # test compilation without xmlrpc-c
+# # ./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --no-xmlrpc-c --with-regtest=$(pwd)/regtest -a -q $@ || exit $?
+
+# # test compilation with xmlrpc-c
+# if ./regression-testing/run-single-test.perl --server --startuptest  ; then
+#   ./bjam -j$(nproc) --with-irstlm=$opt --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-regtest=$(pwd)/regtest $RECOMPILE -q --regtest-skip-compactpt $@
+# fi