Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- fairseq-0.10.2/fairseq_cli/preprocess.py +398 -0
- fairseq-0.10.2/fairseq_cli/score.py +96 -0
- fairseq-0.10.2/tests/test_backtranslation_dataset.py +123 -0
- fairseq-0.10.2/tests/test_constraints.py +269 -0
- fairseq-0.10.2/tests/test_convtbc.py +54 -0
- fairseq-0.10.2/tests/test_inference_dropout.py +66 -0
- fairseq-0.10.2/tests/test_metrics.py +77 -0
- fairseq-0.10.2/tests/test_sequence_scorer.py +120 -0
- fairseq-0.10.2/tests/test_token_block_dataset.py +79 -0
- mosesdecoder/.beautify-ignore +38 -0
- mosesdecoder/.travis.yml +24 -0
- mosesdecoder/azure-pipelines.yml +100 -0
- mosesdecoder/biconcor/Jamfile +2 -0
- mosesdecoder/biconcor/PhrasePairCollection.cpp +209 -0
- mosesdecoder/biconcor/PhrasePairCollection.h +46 -0
- mosesdecoder/biconcor/SuffixArray.h +82 -0
- mosesdecoder/biconcor/biconcor.cpp +171 -0
- mosesdecoder/chk.tmp +1 -0
- mosesdecoder/doxygen.conf +1781 -0
- mosesdecoder/moses-cmd/Jamfile +7 -0
- mosesdecoder/moses-cmd/LatticeMBRGrid.cpp +215 -0
- mosesdecoder/moses-cmd/Main.cpp +33 -0
- mosesdecoder/moses-cmd/MainVW.cpp +186 -0
- mosesdecoder/moses-cmd/MainVW.h +42 -0
- mosesdecoder/moses2/AlignmentInfo.h +148 -0
- mosesdecoder/moses2/AlignmentInfoCollection.cpp +62 -0
- mosesdecoder/moses2/ArcLists.cpp +127 -0
- mosesdecoder/moses2/Array.h +85 -0
- mosesdecoder/moses2/EstimatedScores.h +59 -0
- mosesdecoder/moses2/HypothesisBase.cpp +81 -0
- mosesdecoder/moses2/HypothesisBase.h +76 -0
- mosesdecoder/moses2/HypothesisColl.cpp +286 -0
- mosesdecoder/moses2/HypothesisColl.h +75 -0
- mosesdecoder/moses2/InputPathsBase.h +54 -0
- mosesdecoder/moses2/Main.cpp +116 -0
- mosesdecoder/moses2/Main.h +23 -0
- mosesdecoder/moses2/ManagerBase.cpp +53 -0
- mosesdecoder/moses2/ManagerBase.h +81 -0
- mosesdecoder/moses2/MemPool.h +77 -0
- mosesdecoder/moses2/MemPoolAllocator.h +85 -0
- mosesdecoder/moses2/Moses2Wrapper.h +30 -0
- mosesdecoder/moses2/Phrase.h +146 -0
- mosesdecoder/moses2/Recycler.cpp +13 -0
- mosesdecoder/moses2/Scores.h +81 -0
- mosesdecoder/moses2/SubPhrase.h +54 -0
- mosesdecoder/moses2/TargetPhrase.cpp +15 -0
- mosesdecoder/moses2/TranslationTask.cpp +65 -0
- mosesdecoder/moses2/TrellisPaths.h +64 -0
- mosesdecoder/moses2/TypeDef.h +125 -0
- mosesdecoder/moses2/Vector.cpp +14 -0
fairseq-0.10.2/fairseq_cli/preprocess.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the MIT license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
"""
|
| 7 |
+
Data pre-processing: build vocabularies and binarize training data.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import os
|
| 12 |
+
import shutil
|
| 13 |
+
import sys
|
| 14 |
+
from collections import Counter
|
| 15 |
+
from itertools import zip_longest
|
| 16 |
+
from multiprocessing import Pool
|
| 17 |
+
|
| 18 |
+
from fairseq import options, tasks, utils
|
| 19 |
+
from fairseq.binarizer import Binarizer
|
| 20 |
+
from fairseq.data import indexed_dataset
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
logging.basicConfig(
|
| 24 |
+
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
| 25 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 26 |
+
level=os.environ.get("LOGLEVEL", "INFO").upper(),
|
| 27 |
+
stream=sys.stdout,
|
| 28 |
+
)
|
| 29 |
+
logger = logging.getLogger("fairseq_cli.preprocess")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def main(args):
|
| 33 |
+
utils.import_user_module(args)
|
| 34 |
+
|
| 35 |
+
os.makedirs(args.destdir, exist_ok=True)
|
| 36 |
+
|
| 37 |
+
logger.addHandler(
|
| 38 |
+
logging.FileHandler(
|
| 39 |
+
filename=os.path.join(args.destdir, "preprocess.log"),
|
| 40 |
+
)
|
| 41 |
+
)
|
| 42 |
+
logger.info(args)
|
| 43 |
+
|
| 44 |
+
task = tasks.get_task(args.task)
|
| 45 |
+
|
| 46 |
+
def train_path(lang):
|
| 47 |
+
return "{}{}".format(args.trainpref, ("." + lang) if lang else "")
|
| 48 |
+
|
| 49 |
+
def file_name(prefix, lang):
|
| 50 |
+
fname = prefix
|
| 51 |
+
if lang is not None:
|
| 52 |
+
fname += ".{lang}".format(lang=lang)
|
| 53 |
+
return fname
|
| 54 |
+
|
| 55 |
+
def dest_path(prefix, lang):
|
| 56 |
+
return os.path.join(args.destdir, file_name(prefix, lang))
|
| 57 |
+
|
| 58 |
+
def dict_path(lang):
|
| 59 |
+
return dest_path("dict", lang) + ".txt"
|
| 60 |
+
|
| 61 |
+
def build_dictionary(filenames, src=False, tgt=False):
|
| 62 |
+
assert src ^ tgt
|
| 63 |
+
return task.build_dictionary(
|
| 64 |
+
filenames,
|
| 65 |
+
workers=args.workers,
|
| 66 |
+
threshold=args.thresholdsrc if src else args.thresholdtgt,
|
| 67 |
+
nwords=args.nwordssrc if src else args.nwordstgt,
|
| 68 |
+
padding_factor=args.padding_factor,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
target = not args.only_source
|
| 72 |
+
|
| 73 |
+
if not args.srcdict and os.path.exists(dict_path(args.source_lang)):
|
| 74 |
+
raise FileExistsError(dict_path(args.source_lang))
|
| 75 |
+
if target and not args.tgtdict and os.path.exists(dict_path(args.target_lang)):
|
| 76 |
+
raise FileExistsError(dict_path(args.target_lang))
|
| 77 |
+
|
| 78 |
+
if args.joined_dictionary:
|
| 79 |
+
assert (
|
| 80 |
+
not args.srcdict or not args.tgtdict
|
| 81 |
+
), "cannot use both --srcdict and --tgtdict with --joined-dictionary"
|
| 82 |
+
|
| 83 |
+
if args.srcdict:
|
| 84 |
+
src_dict = task.load_dictionary(args.srcdict)
|
| 85 |
+
elif args.tgtdict:
|
| 86 |
+
src_dict = task.load_dictionary(args.tgtdict)
|
| 87 |
+
else:
|
| 88 |
+
assert (
|
| 89 |
+
args.trainpref
|
| 90 |
+
), "--trainpref must be set if --srcdict is not specified"
|
| 91 |
+
src_dict = build_dictionary(
|
| 92 |
+
{train_path(lang) for lang in [args.source_lang, args.target_lang]},
|
| 93 |
+
src=True,
|
| 94 |
+
)
|
| 95 |
+
tgt_dict = src_dict
|
| 96 |
+
else:
|
| 97 |
+
if args.srcdict:
|
| 98 |
+
src_dict = task.load_dictionary(args.srcdict)
|
| 99 |
+
else:
|
| 100 |
+
assert (
|
| 101 |
+
args.trainpref
|
| 102 |
+
), "--trainpref must be set if --srcdict is not specified"
|
| 103 |
+
src_dict = build_dictionary([train_path(args.source_lang)], src=True)
|
| 104 |
+
|
| 105 |
+
if target:
|
| 106 |
+
if args.tgtdict:
|
| 107 |
+
tgt_dict = task.load_dictionary(args.tgtdict)
|
| 108 |
+
else:
|
| 109 |
+
assert (
|
| 110 |
+
args.trainpref
|
| 111 |
+
), "--trainpref must be set if --tgtdict is not specified"
|
| 112 |
+
tgt_dict = build_dictionary([train_path(args.target_lang)], tgt=True)
|
| 113 |
+
else:
|
| 114 |
+
tgt_dict = None
|
| 115 |
+
|
| 116 |
+
src_dict.save(dict_path(args.source_lang))
|
| 117 |
+
if target and tgt_dict is not None:
|
| 118 |
+
tgt_dict.save(dict_path(args.target_lang))
|
| 119 |
+
|
| 120 |
+
def make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers):
|
| 121 |
+
logger.info("[{}] Dictionary: {} types".format(lang, len(vocab)))
|
| 122 |
+
n_seq_tok = [0, 0]
|
| 123 |
+
replaced = Counter()
|
| 124 |
+
|
| 125 |
+
def merge_result(worker_result):
|
| 126 |
+
replaced.update(worker_result["replaced"])
|
| 127 |
+
n_seq_tok[0] += worker_result["nseq"]
|
| 128 |
+
n_seq_tok[1] += worker_result["ntok"]
|
| 129 |
+
|
| 130 |
+
input_file = "{}{}".format(
|
| 131 |
+
input_prefix, ("." + lang) if lang is not None else ""
|
| 132 |
+
)
|
| 133 |
+
offsets = Binarizer.find_offsets(input_file, num_workers)
|
| 134 |
+
pool = None
|
| 135 |
+
if num_workers > 1:
|
| 136 |
+
pool = Pool(processes=num_workers - 1)
|
| 137 |
+
for worker_id in range(1, num_workers):
|
| 138 |
+
prefix = "{}{}".format(output_prefix, worker_id)
|
| 139 |
+
pool.apply_async(
|
| 140 |
+
binarize,
|
| 141 |
+
(
|
| 142 |
+
args,
|
| 143 |
+
input_file,
|
| 144 |
+
vocab,
|
| 145 |
+
prefix,
|
| 146 |
+
lang,
|
| 147 |
+
offsets[worker_id],
|
| 148 |
+
offsets[worker_id + 1],
|
| 149 |
+
),
|
| 150 |
+
callback=merge_result,
|
| 151 |
+
)
|
| 152 |
+
pool.close()
|
| 153 |
+
|
| 154 |
+
ds = indexed_dataset.make_builder(
|
| 155 |
+
dataset_dest_file(args, output_prefix, lang, "bin"),
|
| 156 |
+
impl=args.dataset_impl,
|
| 157 |
+
vocab_size=len(vocab),
|
| 158 |
+
)
|
| 159 |
+
merge_result(
|
| 160 |
+
Binarizer.binarize(
|
| 161 |
+
input_file, vocab, lambda t: ds.add_item(t), offset=0, end=offsets[1]
|
| 162 |
+
)
|
| 163 |
+
)
|
| 164 |
+
if num_workers > 1:
|
| 165 |
+
pool.join()
|
| 166 |
+
for worker_id in range(1, num_workers):
|
| 167 |
+
prefix = "{}{}".format(output_prefix, worker_id)
|
| 168 |
+
temp_file_path = dataset_dest_prefix(args, prefix, lang)
|
| 169 |
+
ds.merge_file_(temp_file_path)
|
| 170 |
+
os.remove(indexed_dataset.data_file_path(temp_file_path))
|
| 171 |
+
os.remove(indexed_dataset.index_file_path(temp_file_path))
|
| 172 |
+
|
| 173 |
+
ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
|
| 174 |
+
|
| 175 |
+
logger.info(
|
| 176 |
+
"[{}] {}: {} sents, {} tokens, {:.3}% replaced by {}".format(
|
| 177 |
+
lang,
|
| 178 |
+
input_file,
|
| 179 |
+
n_seq_tok[0],
|
| 180 |
+
n_seq_tok[1],
|
| 181 |
+
100 * sum(replaced.values()) / n_seq_tok[1],
|
| 182 |
+
vocab.unk_word,
|
| 183 |
+
)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
def make_binary_alignment_dataset(input_prefix, output_prefix, num_workers):
|
| 187 |
+
nseq = [0]
|
| 188 |
+
|
| 189 |
+
def merge_result(worker_result):
|
| 190 |
+
nseq[0] += worker_result["nseq"]
|
| 191 |
+
|
| 192 |
+
input_file = input_prefix
|
| 193 |
+
offsets = Binarizer.find_offsets(input_file, num_workers)
|
| 194 |
+
pool = None
|
| 195 |
+
if num_workers > 1:
|
| 196 |
+
pool = Pool(processes=num_workers - 1)
|
| 197 |
+
for worker_id in range(1, num_workers):
|
| 198 |
+
prefix = "{}{}".format(output_prefix, worker_id)
|
| 199 |
+
pool.apply_async(
|
| 200 |
+
binarize_alignments,
|
| 201 |
+
(
|
| 202 |
+
args,
|
| 203 |
+
input_file,
|
| 204 |
+
utils.parse_alignment,
|
| 205 |
+
prefix,
|
| 206 |
+
offsets[worker_id],
|
| 207 |
+
offsets[worker_id + 1],
|
| 208 |
+
),
|
| 209 |
+
callback=merge_result,
|
| 210 |
+
)
|
| 211 |
+
pool.close()
|
| 212 |
+
|
| 213 |
+
ds = indexed_dataset.make_builder(
|
| 214 |
+
dataset_dest_file(args, output_prefix, None, "bin"), impl=args.dataset_impl
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
merge_result(
|
| 218 |
+
Binarizer.binarize_alignments(
|
| 219 |
+
input_file,
|
| 220 |
+
utils.parse_alignment,
|
| 221 |
+
lambda t: ds.add_item(t),
|
| 222 |
+
offset=0,
|
| 223 |
+
end=offsets[1],
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
if num_workers > 1:
|
| 227 |
+
pool.join()
|
| 228 |
+
for worker_id in range(1, num_workers):
|
| 229 |
+
prefix = "{}{}".format(output_prefix, worker_id)
|
| 230 |
+
temp_file_path = dataset_dest_prefix(args, prefix, None)
|
| 231 |
+
ds.merge_file_(temp_file_path)
|
| 232 |
+
os.remove(indexed_dataset.data_file_path(temp_file_path))
|
| 233 |
+
os.remove(indexed_dataset.index_file_path(temp_file_path))
|
| 234 |
+
|
| 235 |
+
ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
|
| 236 |
+
|
| 237 |
+
logger.info("[alignments] {}: parsed {} alignments".format(input_file, nseq[0]))
|
| 238 |
+
|
| 239 |
+
def make_dataset(vocab, input_prefix, output_prefix, lang, num_workers=1):
|
| 240 |
+
if args.dataset_impl == "raw":
|
| 241 |
+
# Copy original text file to destination folder
|
| 242 |
+
output_text_file = dest_path(
|
| 243 |
+
output_prefix + ".{}-{}".format(args.source_lang, args.target_lang),
|
| 244 |
+
lang,
|
| 245 |
+
)
|
| 246 |
+
shutil.copyfile(file_name(input_prefix, lang), output_text_file)
|
| 247 |
+
else:
|
| 248 |
+
make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers)
|
| 249 |
+
|
| 250 |
+
def make_all(lang, vocab):
|
| 251 |
+
if args.trainpref:
|
| 252 |
+
make_dataset(vocab, args.trainpref, "train", lang, num_workers=args.workers)
|
| 253 |
+
if args.validpref:
|
| 254 |
+
for k, validpref in enumerate(args.validpref.split(",")):
|
| 255 |
+
outprefix = "valid{}".format(k) if k > 0 else "valid"
|
| 256 |
+
make_dataset(
|
| 257 |
+
vocab, validpref, outprefix, lang, num_workers=args.workers
|
| 258 |
+
)
|
| 259 |
+
if args.testpref:
|
| 260 |
+
for k, testpref in enumerate(args.testpref.split(",")):
|
| 261 |
+
outprefix = "test{}".format(k) if k > 0 else "test"
|
| 262 |
+
make_dataset(vocab, testpref, outprefix, lang, num_workers=args.workers)
|
| 263 |
+
|
| 264 |
+
def make_all_alignments():
|
| 265 |
+
if args.trainpref and os.path.exists(args.trainpref + "." + args.align_suffix):
|
| 266 |
+
make_binary_alignment_dataset(
|
| 267 |
+
args.trainpref + "." + args.align_suffix,
|
| 268 |
+
"train.align",
|
| 269 |
+
num_workers=args.workers,
|
| 270 |
+
)
|
| 271 |
+
if args.validpref and os.path.exists(args.validpref + "." + args.align_suffix):
|
| 272 |
+
make_binary_alignment_dataset(
|
| 273 |
+
args.validpref + "." + args.align_suffix,
|
| 274 |
+
"valid.align",
|
| 275 |
+
num_workers=args.workers,
|
| 276 |
+
)
|
| 277 |
+
if args.testpref and os.path.exists(args.testpref + "." + args.align_suffix):
|
| 278 |
+
make_binary_alignment_dataset(
|
| 279 |
+
args.testpref + "." + args.align_suffix,
|
| 280 |
+
"test.align",
|
| 281 |
+
num_workers=args.workers,
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
make_all(args.source_lang, src_dict)
|
| 285 |
+
if target:
|
| 286 |
+
make_all(args.target_lang, tgt_dict)
|
| 287 |
+
if args.align_suffix:
|
| 288 |
+
make_all_alignments()
|
| 289 |
+
|
| 290 |
+
logger.info("Wrote preprocessed data to {}".format(args.destdir))
|
| 291 |
+
|
| 292 |
+
if args.alignfile:
|
| 293 |
+
assert args.trainpref, "--trainpref must be set if --alignfile is specified"
|
| 294 |
+
src_file_name = train_path(args.source_lang)
|
| 295 |
+
tgt_file_name = train_path(args.target_lang)
|
| 296 |
+
freq_map = {}
|
| 297 |
+
with open(args.alignfile, "r", encoding="utf-8") as align_file:
|
| 298 |
+
with open(src_file_name, "r", encoding="utf-8") as src_file:
|
| 299 |
+
with open(tgt_file_name, "r", encoding="utf-8") as tgt_file:
|
| 300 |
+
for a, s, t in zip_longest(align_file, src_file, tgt_file):
|
| 301 |
+
si = src_dict.encode_line(s, add_if_not_exist=False)
|
| 302 |
+
ti = tgt_dict.encode_line(t, add_if_not_exist=False)
|
| 303 |
+
ai = list(map(lambda x: tuple(x.split("-")), a.split()))
|
| 304 |
+
for sai, tai in ai:
|
| 305 |
+
srcidx = si[int(sai)]
|
| 306 |
+
tgtidx = ti[int(tai)]
|
| 307 |
+
if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk():
|
| 308 |
+
assert srcidx != src_dict.pad()
|
| 309 |
+
assert srcidx != src_dict.eos()
|
| 310 |
+
assert tgtidx != tgt_dict.pad()
|
| 311 |
+
assert tgtidx != tgt_dict.eos()
|
| 312 |
+
|
| 313 |
+
if srcidx not in freq_map:
|
| 314 |
+
freq_map[srcidx] = {}
|
| 315 |
+
if tgtidx not in freq_map[srcidx]:
|
| 316 |
+
freq_map[srcidx][tgtidx] = 1
|
| 317 |
+
else:
|
| 318 |
+
freq_map[srcidx][tgtidx] += 1
|
| 319 |
+
|
| 320 |
+
align_dict = {}
|
| 321 |
+
for srcidx in freq_map.keys():
|
| 322 |
+
align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get)
|
| 323 |
+
|
| 324 |
+
with open(
|
| 325 |
+
os.path.join(
|
| 326 |
+
args.destdir,
|
| 327 |
+
"alignment.{}-{}.txt".format(args.source_lang, args.target_lang),
|
| 328 |
+
),
|
| 329 |
+
"w",
|
| 330 |
+
encoding="utf-8",
|
| 331 |
+
) as f:
|
| 332 |
+
for k, v in align_dict.items():
|
| 333 |
+
print("{} {}".format(src_dict[k], tgt_dict[v]), file=f)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def binarize(args, filename, vocab, output_prefix, lang, offset, end, append_eos=True):
|
| 337 |
+
ds = indexed_dataset.make_builder(
|
| 338 |
+
dataset_dest_file(args, output_prefix, lang, "bin"),
|
| 339 |
+
impl=args.dataset_impl,
|
| 340 |
+
vocab_size=len(vocab),
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
def consumer(tensor):
|
| 344 |
+
ds.add_item(tensor)
|
| 345 |
+
|
| 346 |
+
res = Binarizer.binarize(
|
| 347 |
+
filename, vocab, consumer, append_eos=append_eos, offset=offset, end=end
|
| 348 |
+
)
|
| 349 |
+
ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
|
| 350 |
+
return res
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
def binarize_alignments(args, filename, parse_alignment, output_prefix, offset, end):
|
| 354 |
+
ds = indexed_dataset.make_builder(
|
| 355 |
+
dataset_dest_file(args, output_prefix, None, "bin"),
|
| 356 |
+
impl=args.dataset_impl,
|
| 357 |
+
vocab_size=None,
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
def consumer(tensor):
|
| 361 |
+
ds.add_item(tensor)
|
| 362 |
+
|
| 363 |
+
res = Binarizer.binarize_alignments(
|
| 364 |
+
filename, parse_alignment, consumer, offset=offset, end=end
|
| 365 |
+
)
|
| 366 |
+
ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
|
| 367 |
+
return res
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
def dataset_dest_prefix(args, output_prefix, lang):
|
| 371 |
+
base = "{}/{}".format(args.destdir, output_prefix)
|
| 372 |
+
if lang is not None:
|
| 373 |
+
lang_part = ".{}-{}.{}".format(args.source_lang, args.target_lang, lang)
|
| 374 |
+
elif args.only_source:
|
| 375 |
+
lang_part = ""
|
| 376 |
+
else:
|
| 377 |
+
lang_part = ".{}-{}".format(args.source_lang, args.target_lang)
|
| 378 |
+
|
| 379 |
+
return "{}{}".format(base, lang_part)
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
def dataset_dest_file(args, output_prefix, lang, extension):
|
| 383 |
+
base = dataset_dest_prefix(args, output_prefix, lang)
|
| 384 |
+
return "{}.{}".format(base, extension)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def get_offsets(input_file, num_workers):
|
| 388 |
+
return Binarizer.find_offsets(input_file, num_workers)
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
def cli_main():
|
| 392 |
+
parser = options.get_preprocessing_parser()
|
| 393 |
+
args = parser.parse_args()
|
| 394 |
+
main(args)
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
if __name__ == "__main__":
|
| 398 |
+
cli_main()
|
fairseq-0.10.2/fairseq_cli/score.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the MIT license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
"""
|
| 7 |
+
BLEU scoring of generated translations against reference translations.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
|
| 14 |
+
from fairseq.data import dictionary
|
| 15 |
+
from fairseq.scoring import bleu
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_parser():
|
| 19 |
+
parser = argparse.ArgumentParser(
|
| 20 |
+
description="Command-line script for BLEU scoring."
|
| 21 |
+
)
|
| 22 |
+
# fmt: off
|
| 23 |
+
parser.add_argument('-s', '--sys', default='-', help='system output')
|
| 24 |
+
parser.add_argument('-r', '--ref', required=True, help='references')
|
| 25 |
+
parser.add_argument('-o', '--order', default=4, metavar='N',
|
| 26 |
+
type=int, help='consider ngrams up to this order')
|
| 27 |
+
parser.add_argument('--ignore-case', action='store_true',
|
| 28 |
+
help='case-insensitive scoring')
|
| 29 |
+
parser.add_argument('--sacrebleu', action='store_true',
|
| 30 |
+
help='score with sacrebleu')
|
| 31 |
+
parser.add_argument('--sentence-bleu', action='store_true',
|
| 32 |
+
help='report sentence-level BLEUs (i.e., with +1 smoothing)')
|
| 33 |
+
# fmt: on
|
| 34 |
+
return parser
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def cli_main():
|
| 38 |
+
parser = get_parser()
|
| 39 |
+
args = parser.parse_args()
|
| 40 |
+
print(args)
|
| 41 |
+
|
| 42 |
+
assert args.sys == "-" or os.path.exists(
|
| 43 |
+
args.sys
|
| 44 |
+
), "System output file {} does not exist".format(args.sys)
|
| 45 |
+
assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)
|
| 46 |
+
|
| 47 |
+
dict = dictionary.Dictionary()
|
| 48 |
+
|
| 49 |
+
def readlines(fd):
|
| 50 |
+
for line in fd.readlines():
|
| 51 |
+
if args.ignore_case:
|
| 52 |
+
yield line.lower()
|
| 53 |
+
else:
|
| 54 |
+
yield line
|
| 55 |
+
|
| 56 |
+
if args.sacrebleu:
|
| 57 |
+
import sacrebleu
|
| 58 |
+
|
| 59 |
+
def score(fdsys):
|
| 60 |
+
with open(args.ref) as fdref:
|
| 61 |
+
print(sacrebleu.corpus_bleu(fdsys, [fdref]))
|
| 62 |
+
|
| 63 |
+
elif args.sentence_bleu:
|
| 64 |
+
|
| 65 |
+
def score(fdsys):
|
| 66 |
+
with open(args.ref) as fdref:
|
| 67 |
+
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
|
| 68 |
+
for i, (sys_tok, ref_tok) in enumerate(
|
| 69 |
+
zip(readlines(fdsys), readlines(fdref))
|
| 70 |
+
):
|
| 71 |
+
scorer.reset(one_init=True)
|
| 72 |
+
sys_tok = dict.encode_line(sys_tok)
|
| 73 |
+
ref_tok = dict.encode_line(ref_tok)
|
| 74 |
+
scorer.add(ref_tok, sys_tok)
|
| 75 |
+
print(i, scorer.result_string(args.order))
|
| 76 |
+
|
| 77 |
+
else:
|
| 78 |
+
|
| 79 |
+
def score(fdsys):
|
| 80 |
+
with open(args.ref) as fdref:
|
| 81 |
+
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
|
| 82 |
+
for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
|
| 83 |
+
sys_tok = dict.encode_line(sys_tok)
|
| 84 |
+
ref_tok = dict.encode_line(ref_tok)
|
| 85 |
+
scorer.add(ref_tok, sys_tok)
|
| 86 |
+
print(scorer.result_string(args.order))
|
| 87 |
+
|
| 88 |
+
if args.sys == "-":
|
| 89 |
+
score(sys.stdin)
|
| 90 |
+
else:
|
| 91 |
+
with open(args.sys, "r") as f:
|
| 92 |
+
score(f)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
cli_main()
|
fairseq-0.10.2/tests/test_backtranslation_dataset.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import unittest
|
| 7 |
+
|
| 8 |
+
import tests.utils as test_utils
|
| 9 |
+
import torch
|
| 10 |
+
from fairseq.data import (
|
| 11 |
+
BacktranslationDataset,
|
| 12 |
+
LanguagePairDataset,
|
| 13 |
+
TransformEosDataset,
|
| 14 |
+
)
|
| 15 |
+
from fairseq.sequence_generator import SequenceGenerator
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class TestBacktranslationDataset(unittest.TestCase):
|
| 19 |
+
def setUp(self):
|
| 20 |
+
(
|
| 21 |
+
self.tgt_dict,
|
| 22 |
+
self.w1,
|
| 23 |
+
self.w2,
|
| 24 |
+
self.src_tokens,
|
| 25 |
+
self.src_lengths,
|
| 26 |
+
self.model,
|
| 27 |
+
) = test_utils.sequence_generator_setup()
|
| 28 |
+
|
| 29 |
+
dummy_src_samples = self.src_tokens
|
| 30 |
+
|
| 31 |
+
self.tgt_dataset = test_utils.TestDataset(data=dummy_src_samples)
|
| 32 |
+
self.cuda = torch.cuda.is_available()
|
| 33 |
+
|
| 34 |
+
def _backtranslation_dataset_helper(
|
| 35 |
+
self,
|
| 36 |
+
remove_eos_from_input_src,
|
| 37 |
+
remove_eos_from_output_src,
|
| 38 |
+
):
|
| 39 |
+
tgt_dataset = LanguagePairDataset(
|
| 40 |
+
src=self.tgt_dataset,
|
| 41 |
+
src_sizes=self.tgt_dataset.sizes,
|
| 42 |
+
src_dict=self.tgt_dict,
|
| 43 |
+
tgt=None,
|
| 44 |
+
tgt_sizes=None,
|
| 45 |
+
tgt_dict=None,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
generator = SequenceGenerator(
|
| 49 |
+
[self.model],
|
| 50 |
+
tgt_dict=self.tgt_dict,
|
| 51 |
+
max_len_a=0,
|
| 52 |
+
max_len_b=200,
|
| 53 |
+
beam_size=2,
|
| 54 |
+
unk_penalty=0,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
backtranslation_dataset = BacktranslationDataset(
|
| 58 |
+
tgt_dataset=TransformEosDataset(
|
| 59 |
+
dataset=tgt_dataset,
|
| 60 |
+
eos=self.tgt_dict.eos(),
|
| 61 |
+
# remove eos from the input src
|
| 62 |
+
remove_eos_from_src=remove_eos_from_input_src,
|
| 63 |
+
),
|
| 64 |
+
src_dict=self.tgt_dict,
|
| 65 |
+
backtranslation_fn=(
|
| 66 |
+
lambda sample: generator.generate([self.model], sample)
|
| 67 |
+
),
|
| 68 |
+
output_collater=TransformEosDataset(
|
| 69 |
+
dataset=tgt_dataset,
|
| 70 |
+
eos=self.tgt_dict.eos(),
|
| 71 |
+
# if we remove eos from the input src, then we need to add it
|
| 72 |
+
# back to the output tgt
|
| 73 |
+
append_eos_to_tgt=remove_eos_from_input_src,
|
| 74 |
+
remove_eos_from_src=remove_eos_from_output_src,
|
| 75 |
+
).collater,
|
| 76 |
+
cuda=self.cuda,
|
| 77 |
+
)
|
| 78 |
+
dataloader = torch.utils.data.DataLoader(
|
| 79 |
+
backtranslation_dataset,
|
| 80 |
+
batch_size=2,
|
| 81 |
+
collate_fn=backtranslation_dataset.collater,
|
| 82 |
+
)
|
| 83 |
+
backtranslation_batch_result = next(iter(dataloader))
|
| 84 |
+
|
| 85 |
+
eos, pad, w1, w2 = self.tgt_dict.eos(), self.tgt_dict.pad(), self.w1, self.w2
|
| 86 |
+
|
| 87 |
+
# Note that we sort by src_lengths and add left padding, so actually
|
| 88 |
+
# ids will look like: [1, 0]
|
| 89 |
+
expected_src = torch.LongTensor([[w1, w2, w1, eos], [pad, pad, w1, eos]])
|
| 90 |
+
if remove_eos_from_output_src:
|
| 91 |
+
expected_src = expected_src[:, :-1]
|
| 92 |
+
expected_tgt = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]])
|
| 93 |
+
generated_src = backtranslation_batch_result["net_input"]["src_tokens"]
|
| 94 |
+
tgt_tokens = backtranslation_batch_result["target"]
|
| 95 |
+
|
| 96 |
+
self.assertTensorEqual(expected_src, generated_src)
|
| 97 |
+
self.assertTensorEqual(expected_tgt, tgt_tokens)
|
| 98 |
+
|
| 99 |
+
def test_backtranslation_dataset_no_eos_in_output_src(self):
|
| 100 |
+
self._backtranslation_dataset_helper(
|
| 101 |
+
remove_eos_from_input_src=False,
|
| 102 |
+
remove_eos_from_output_src=True,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
def test_backtranslation_dataset_with_eos_in_output_src(self):
|
| 106 |
+
self._backtranslation_dataset_helper(
|
| 107 |
+
remove_eos_from_input_src=False,
|
| 108 |
+
remove_eos_from_output_src=False,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def test_backtranslation_dataset_no_eos_in_input_src(self):
|
| 112 |
+
self._backtranslation_dataset_helper(
|
| 113 |
+
remove_eos_from_input_src=True,
|
| 114 |
+
remove_eos_from_output_src=False,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
def assertTensorEqual(self, t1, t2):
|
| 118 |
+
self.assertEqual(t1.size(), t2.size(), "size mismatch")
|
| 119 |
+
self.assertEqual(t1.ne(t2).long().sum(), 0)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
unittest.main()
|
fairseq-0.10.2/tests/test_constraints.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import unittest
|
| 8 |
+
|
| 9 |
+
import torch
|
| 10 |
+
from fairseq.token_generation_constraints import *
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def tensorize(constraints: List[List[int]]) -> torch.Tensor:
|
| 14 |
+
return [torch.tensor(x) for x in constraints]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TestHelperRoutines(unittest.TestCase):
|
| 18 |
+
def setUp(self):
|
| 19 |
+
self.examples = [
|
| 20 |
+
([[]], torch.tensor([[0]])),
|
| 21 |
+
([[], []], torch.tensor([[0], [0]])),
|
| 22 |
+
([[torch.tensor([1, 2])], []], torch.tensor([[1, 1, 2, 0], [0, 0, 0, 0]])),
|
| 23 |
+
(
|
| 24 |
+
[
|
| 25 |
+
[
|
| 26 |
+
torch.tensor([3, 1, 2]),
|
| 27 |
+
torch.tensor([3]),
|
| 28 |
+
torch.tensor([4, 5, 6, 7]),
|
| 29 |
+
],
|
| 30 |
+
[],
|
| 31 |
+
[torch.tensor([1, 8, 9, 10, 1, 4, 11, 12])],
|
| 32 |
+
],
|
| 33 |
+
torch.tensor(
|
| 34 |
+
[
|
| 35 |
+
[3, 3, 1, 2, 0, 3, 0, 4, 5, 6, 7, 0],
|
| 36 |
+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
| 37 |
+
[1, 1, 8, 9, 10, 1, 4, 11, 12, 0, 0, 0],
|
| 38 |
+
]
|
| 39 |
+
),
|
| 40 |
+
),
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
def test_packing(self):
|
| 44 |
+
"""Ensures the list of lists of tensors gets packed correctly."""
|
| 45 |
+
for batch_constraints, expected_tensor in self.examples:
|
| 46 |
+
packed = pack_constraints(batch_constraints)
|
| 47 |
+
assert torch.equal(packed, expected_tensor)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class TestUnorderedConstraintState(unittest.TestCase):
|
| 51 |
+
def setUp(self):
|
| 52 |
+
# Tuples of (contraint set, expected printed graph, token counts per node)
|
| 53 |
+
self.examples = [
|
| 54 |
+
(
|
| 55 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 56 |
+
"([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
|
| 57 |
+
{1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
|
| 58 |
+
),
|
| 59 |
+
([], "[None].False#0", {}),
|
| 60 |
+
(tensorize([[0]]), "([None].False#1 [0].True#1)", {0: 1}),
|
| 61 |
+
(
|
| 62 |
+
tensorize([[100000, 1, 2, 3, 4, 5]]),
|
| 63 |
+
"([None].False#1 ([100000].False#1 ([1].False#1 ([2].False#1 ([3].False#1 ([4].False#1 [5].True#1))))))",
|
| 64 |
+
{100000: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1},
|
| 65 |
+
),
|
| 66 |
+
(
|
| 67 |
+
tensorize([[1, 2], [1, 2]]),
|
| 68 |
+
"([None].False#2 ([1].False#2 [2].True#2))",
|
| 69 |
+
{1: 2, 2: 2},
|
| 70 |
+
),
|
| 71 |
+
(
|
| 72 |
+
tensorize([[1, 2], [3, 4]]),
|
| 73 |
+
"([None].False#2 ([1].False#1 [2].True#1) ([3].False#1 [4].True#1))",
|
| 74 |
+
{1: 1, 2: 1, 3: 1, 4: 1},
|
| 75 |
+
),
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
+
self.sequences = [
|
| 79 |
+
(
|
| 80 |
+
self.examples[0][0],
|
| 81 |
+
[],
|
| 82 |
+
{"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
|
| 83 |
+
),
|
| 84 |
+
(
|
| 85 |
+
self.examples[0][0],
|
| 86 |
+
[1, 2],
|
| 87 |
+
{"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
|
| 88 |
+
),
|
| 89 |
+
(
|
| 90 |
+
self.examples[0][0],
|
| 91 |
+
[1, 2, 94],
|
| 92 |
+
{"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
|
| 93 |
+
),
|
| 94 |
+
(
|
| 95 |
+
self.examples[0][0],
|
| 96 |
+
[1, 3, 999, 1, 4],
|
| 97 |
+
{"bank": 4, "num_completed": 2, "finished": False, "is_root": False},
|
| 98 |
+
),
|
| 99 |
+
(
|
| 100 |
+
self.examples[0][0],
|
| 101 |
+
[1, 3, 999, 1, 4, 999],
|
| 102 |
+
{"bank": 4, "num_completed": 2, "finished": False, "is_root": True},
|
| 103 |
+
),
|
| 104 |
+
(
|
| 105 |
+
self.examples[0][0],
|
| 106 |
+
[4, 5, 6, 8],
|
| 107 |
+
{"bank": 2, "num_completed": 1, "finished": False, "is_root": True},
|
| 108 |
+
),
|
| 109 |
+
(
|
| 110 |
+
self.examples[0][0],
|
| 111 |
+
# Tricky, because in last three, goes down [1->4] branch, could miss [1] and [4->5]
|
| 112 |
+
# [[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]],
|
| 113 |
+
[1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
|
| 114 |
+
{"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
|
| 115 |
+
),
|
| 116 |
+
(
|
| 117 |
+
self.examples[0][0],
|
| 118 |
+
[1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
|
| 119 |
+
{"bank": 14, "num_completed": 6, "finished": True, "is_root": True},
|
| 120 |
+
),
|
| 121 |
+
(
|
| 122 |
+
tensorize([[1], [2, 3]]),
|
| 123 |
+
# Should not be able to get credit for entering 1 a second time
|
| 124 |
+
[1, 1],
|
| 125 |
+
{"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
|
| 126 |
+
),
|
| 127 |
+
(
|
| 128 |
+
self.examples[4][0],
|
| 129 |
+
[1, 2, 1, 2],
|
| 130 |
+
{"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
|
| 131 |
+
),
|
| 132 |
+
(
|
| 133 |
+
self.examples[4][0],
|
| 134 |
+
[1, 2, 1, 2, 1],
|
| 135 |
+
{"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
|
| 136 |
+
),
|
| 137 |
+
(
|
| 138 |
+
self.examples[5][0],
|
| 139 |
+
[1, 2, 3, 4, 5],
|
| 140 |
+
{"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
|
| 141 |
+
),
|
| 142 |
+
]
|
| 143 |
+
|
| 144 |
+
def test_graphs(self):
|
| 145 |
+
"""
|
| 146 |
+
Test whether unordered graph systems are created correctly.
|
| 147 |
+
"""
|
| 148 |
+
for example in self.examples:
|
| 149 |
+
constraints, expected, gold_counts = example
|
| 150 |
+
c = ConstraintNode.create(constraints)
|
| 151 |
+
assert (
|
| 152 |
+
ConstraintNode.print_graph(c) == expected
|
| 153 |
+
), f"got {ConstraintNode.print_graph(c)}, expected {expected}"
|
| 154 |
+
assert (
|
| 155 |
+
c.token_counts() == gold_counts
|
| 156 |
+
), f"{c} got {c.token_counts()} wanted {gold_counts}"
|
| 157 |
+
|
| 158 |
+
def test_next_tokens(self):
|
| 159 |
+
"""
|
| 160 |
+
Tests that the set of next tokens is correct.
|
| 161 |
+
"""
|
| 162 |
+
for example in self.examples:
|
| 163 |
+
constraints, expected, gold_counts = example
|
| 164 |
+
root = ConstraintNode.create(constraints)
|
| 165 |
+
|
| 166 |
+
root_tokens = set(root.children.keys())
|
| 167 |
+
for sequence in constraints:
|
| 168 |
+
state = UnorderedConstraintState(root)
|
| 169 |
+
for token in sequence:
|
| 170 |
+
all_tokens = root_tokens.union(state.node.children.keys())
|
| 171 |
+
assert (
|
| 172 |
+
all_tokens == state.next_tokens()
|
| 173 |
+
), f"ALL {all_tokens} NEXT {state.next_tokens()}"
|
| 174 |
+
state = state.advance(token)
|
| 175 |
+
|
| 176 |
+
def test_sequences(self):
|
| 177 |
+
for constraints, tokens, expected in self.sequences:
|
| 178 |
+
state = UnorderedConstraintState.create(pack_constraints([constraints])[0])
|
| 179 |
+
for token in tokens:
|
| 180 |
+
state = state.advance(token)
|
| 181 |
+
result = {}
|
| 182 |
+
for attr in expected.keys():
|
| 183 |
+
result[attr] = getattr(state, attr)
|
| 184 |
+
|
| 185 |
+
assert (
|
| 186 |
+
result == expected
|
| 187 |
+
), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
class TestOrderedConstraintState(unittest.TestCase):
|
| 191 |
+
def setUp(self):
|
| 192 |
+
self.sequences = [
|
| 193 |
+
(
|
| 194 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 195 |
+
[],
|
| 196 |
+
{"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
|
| 197 |
+
),
|
| 198 |
+
(
|
| 199 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 200 |
+
[1, 2],
|
| 201 |
+
{"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
|
| 202 |
+
),
|
| 203 |
+
(
|
| 204 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 205 |
+
[1, 2, 94],
|
| 206 |
+
{"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
|
| 207 |
+
),
|
| 208 |
+
(
|
| 209 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 210 |
+
[1, 3, 999, 1, 4],
|
| 211 |
+
{"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
|
| 212 |
+
),
|
| 213 |
+
(
|
| 214 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 215 |
+
[1, 2, 3, 999, 999],
|
| 216 |
+
{"bank": 3, "num_completed": 1, "finished": False, "is_root": False},
|
| 217 |
+
),
|
| 218 |
+
(
|
| 219 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 220 |
+
[1, 2, 3, 77, 1, 3, 1],
|
| 221 |
+
{"bank": 6, "num_completed": 2, "finished": False, "is_root": False},
|
| 222 |
+
),
|
| 223 |
+
(
|
| 224 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 225 |
+
[1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
|
| 226 |
+
{"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
|
| 227 |
+
),
|
| 228 |
+
(
|
| 229 |
+
tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
|
| 230 |
+
[1, 2, 999, 1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
|
| 231 |
+
{"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
|
| 232 |
+
),
|
| 233 |
+
(
|
| 234 |
+
tensorize([[1], [2, 3]]),
|
| 235 |
+
[1, 1],
|
| 236 |
+
{"bank": 1, "num_completed": 1, "finished": False, "is_root": False},
|
| 237 |
+
),
|
| 238 |
+
(
|
| 239 |
+
tensorize([[1, 2], [1, 2]]),
|
| 240 |
+
[1, 2, 1, 2],
|
| 241 |
+
{"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
|
| 242 |
+
),
|
| 243 |
+
(
|
| 244 |
+
tensorize([[1, 2], [1, 2]]),
|
| 245 |
+
[1, 2, 1, 2, 1],
|
| 246 |
+
{"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
|
| 247 |
+
),
|
| 248 |
+
(
|
| 249 |
+
tensorize([[1, 2], [3, 4]]),
|
| 250 |
+
[1, 2, 3, 4, 5],
|
| 251 |
+
{"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
|
| 252 |
+
),
|
| 253 |
+
]
|
| 254 |
+
|
| 255 |
+
def test_sequences(self):
|
| 256 |
+
for i, (constraints, tokens, expected) in enumerate(self.sequences):
|
| 257 |
+
state = OrderedConstraintState.create(pack_constraints([constraints])[0])
|
| 258 |
+
for token in tokens:
|
| 259 |
+
state = state.advance(token)
|
| 260 |
+
result = {}
|
| 261 |
+
for attr in expected.keys():
|
| 262 |
+
result[attr] = getattr(state, attr)
|
| 263 |
+
assert (
|
| 264 |
+
result == expected
|
| 265 |
+
), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
if __name__ == "__main__":
|
| 269 |
+
unittest.main()
|
fairseq-0.10.2/tests/test_convtbc.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import unittest
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
from fairseq.modules import ConvTBC
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestConvTBC(unittest.TestCase):
|
| 14 |
+
def test_convtbc(self):
|
| 15 |
+
# ksz, in_channels, out_channels
|
| 16 |
+
conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1)
|
| 17 |
+
# out_channels, in_channels, ksz
|
| 18 |
+
conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1)
|
| 19 |
+
|
| 20 |
+
conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2))
|
| 21 |
+
conv_tbc.bias.data.copy_(conv1d.bias.data)
|
| 22 |
+
|
| 23 |
+
input_tbc = torch.randn(7, 2, 4, requires_grad=True)
|
| 24 |
+
input1d = input_tbc.data.transpose(0, 1).transpose(1, 2)
|
| 25 |
+
input1d.requires_grad = True
|
| 26 |
+
|
| 27 |
+
output_tbc = conv_tbc(input_tbc)
|
| 28 |
+
output1d = conv1d(input1d)
|
| 29 |
+
|
| 30 |
+
self.assertAlmostEqual(
|
| 31 |
+
output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
grad_tbc = torch.randn(output_tbc.size())
|
| 35 |
+
grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous()
|
| 36 |
+
|
| 37 |
+
output_tbc.backward(grad_tbc)
|
| 38 |
+
output1d.backward(grad1d)
|
| 39 |
+
|
| 40 |
+
self.assertAlmostEqual(
|
| 41 |
+
conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data
|
| 42 |
+
)
|
| 43 |
+
self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data)
|
| 44 |
+
self.assertAlmostEqual(
|
| 45 |
+
input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
def assertAlmostEqual(self, t1, t2):
|
| 49 |
+
self.assertEqual(t1.size(), t2.size(), "size mismatch")
|
| 50 |
+
self.assertLess((t1 - t2).abs().max(), 1e-4)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
unittest.main()
|
fairseq-0.10.2/tests/test_inference_dropout.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import logging
|
| 7 |
+
import unittest
|
| 8 |
+
|
| 9 |
+
from fairseq.models.transformer import TransformerModel
|
| 10 |
+
from tests.test_sequence_generator import get_dummy_task_and_parser
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestInferenceDropout(unittest.TestCase):
|
| 14 |
+
def setUp(self):
|
| 15 |
+
self.task, self.parser = get_dummy_task_and_parser()
|
| 16 |
+
TransformerModel.add_args(self.parser)
|
| 17 |
+
self.args = self.parser.parse_args([])
|
| 18 |
+
self.args.encoder_layers = 2
|
| 19 |
+
self.args.decoder_layers = 1
|
| 20 |
+
logging.disable(logging.CRITICAL)
|
| 21 |
+
|
| 22 |
+
def tearDown(self):
|
| 23 |
+
logging.disable(logging.NOTSET)
|
| 24 |
+
|
| 25 |
+
def test_sets_inference_dropout_to_true(self):
|
| 26 |
+
self.args.retain_dropout = True
|
| 27 |
+
self.transformer_model = TransformerModel.build_model(self.args, self.task)
|
| 28 |
+
self.transformer_model.prepare_for_inference_(self.args)
|
| 29 |
+
assert self.transformer_model.encoder.dropout_module.apply_during_inference
|
| 30 |
+
assert self.transformer_model.decoder.dropout_module.apply_during_inference
|
| 31 |
+
for layer in self.transformer_model.encoder.layers:
|
| 32 |
+
assert layer.dropout_module.apply_during_inference
|
| 33 |
+
|
| 34 |
+
def test_inference_dropout_false_by_default(self):
|
| 35 |
+
self.transformer_model = TransformerModel.build_model(self.args, self.task)
|
| 36 |
+
self.transformer_model.prepare_for_inference_(self.args)
|
| 37 |
+
assert not self.transformer_model.encoder.dropout_module.apply_during_inference
|
| 38 |
+
assert not self.transformer_model.decoder.dropout_module.apply_during_inference
|
| 39 |
+
for layer in self.transformer_model.encoder.layers:
|
| 40 |
+
assert not layer.dropout_module.apply_during_inference
|
| 41 |
+
for layer in self.transformer_model.decoder.layers:
|
| 42 |
+
assert not layer.dropout_module.apply_during_inference
|
| 43 |
+
|
| 44 |
+
def test_applies_training_mode(self):
|
| 45 |
+
self.transformer_model = TransformerModel.build_model(self.args, self.task)
|
| 46 |
+
assert self.transformer_model.encoder.dropout_module.training
|
| 47 |
+
for layer in self.transformer_model.encoder.layers:
|
| 48 |
+
assert layer.dropout_module.training
|
| 49 |
+
|
| 50 |
+
self.transformer_model.eval()
|
| 51 |
+
assert not self.transformer_model.decoder.dropout_module.training
|
| 52 |
+
for layer in self.transformer_model.encoder.layers:
|
| 53 |
+
assert not layer.dropout_module.training
|
| 54 |
+
|
| 55 |
+
def test_retain_modules(self):
|
| 56 |
+
self.args.retain_dropout = True
|
| 57 |
+
self.args.retain_dropout_modules = [
|
| 58 |
+
"TransformerEncoder",
|
| 59 |
+
"TransformerEncoderLayer",
|
| 60 |
+
]
|
| 61 |
+
self.transformer_model = TransformerModel.build_model(self.args, self.task)
|
| 62 |
+
self.transformer_model.prepare_for_inference_(self.args)
|
| 63 |
+
assert self.transformer_model.encoder.dropout_module.apply_during_inference
|
| 64 |
+
assert not self.transformer_model.decoder.dropout_module.apply_during_inference
|
| 65 |
+
for layer in self.transformer_model.decoder.layers:
|
| 66 |
+
assert not layer.dropout_module.apply_during_inference
|
fairseq-0.10.2/tests/test_metrics.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import unittest
|
| 7 |
+
import uuid
|
| 8 |
+
|
| 9 |
+
from fairseq import metrics
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestMetrics(unittest.TestCase):
|
| 13 |
+
def test_nesting(self):
|
| 14 |
+
with metrics.aggregate() as a:
|
| 15 |
+
metrics.log_scalar("loss", 1)
|
| 16 |
+
with metrics.aggregate() as b:
|
| 17 |
+
metrics.log_scalar("loss", 2)
|
| 18 |
+
|
| 19 |
+
self.assertEqual(a.get_smoothed_values()["loss"], 1.5)
|
| 20 |
+
self.assertEqual(b.get_smoothed_values()["loss"], 2)
|
| 21 |
+
|
| 22 |
+
def test_new_root(self):
|
| 23 |
+
with metrics.aggregate() as a:
|
| 24 |
+
metrics.log_scalar("loss", 1)
|
| 25 |
+
with metrics.aggregate(new_root=True) as b:
|
| 26 |
+
metrics.log_scalar("loss", 2)
|
| 27 |
+
|
| 28 |
+
self.assertEqual(a.get_smoothed_values()["loss"], 1)
|
| 29 |
+
self.assertEqual(b.get_smoothed_values()["loss"], 2)
|
| 30 |
+
|
| 31 |
+
def test_nested_new_root(self):
|
| 32 |
+
with metrics.aggregate() as layer1:
|
| 33 |
+
metrics.log_scalar("loss", 1)
|
| 34 |
+
with metrics.aggregate(new_root=True) as layer2:
|
| 35 |
+
metrics.log_scalar("loss", 2)
|
| 36 |
+
with metrics.aggregate() as layer3:
|
| 37 |
+
metrics.log_scalar("loss", 3)
|
| 38 |
+
with metrics.aggregate(new_root=True) as layer4:
|
| 39 |
+
metrics.log_scalar("loss", 4)
|
| 40 |
+
metrics.log_scalar("loss", 1.5)
|
| 41 |
+
|
| 42 |
+
self.assertEqual(layer4.get_smoothed_values()["loss"], 4)
|
| 43 |
+
self.assertEqual(layer3.get_smoothed_values()["loss"], 3)
|
| 44 |
+
self.assertEqual(layer2.get_smoothed_values()["loss"], 2.5)
|
| 45 |
+
self.assertEqual(layer1.get_smoothed_values()["loss"], 1.25)
|
| 46 |
+
|
| 47 |
+
def test_named(self):
|
| 48 |
+
name = str(uuid.uuid4())
|
| 49 |
+
metrics.reset_meters(name)
|
| 50 |
+
|
| 51 |
+
with metrics.aggregate(name):
|
| 52 |
+
metrics.log_scalar("loss", 1)
|
| 53 |
+
|
| 54 |
+
metrics.log_scalar("loss", 3)
|
| 55 |
+
|
| 56 |
+
with metrics.aggregate(name):
|
| 57 |
+
metrics.log_scalar("loss", 2)
|
| 58 |
+
|
| 59 |
+
self.assertEqual(metrics.get_smoothed_values(name)["loss"], 1.5)
|
| 60 |
+
|
| 61 |
+
def test_nested_duplicate_names(self):
|
| 62 |
+
name = str(uuid.uuid4())
|
| 63 |
+
metrics.reset_meters(name)
|
| 64 |
+
|
| 65 |
+
with metrics.aggregate(name):
|
| 66 |
+
metrics.log_scalar("loss", 1)
|
| 67 |
+
with metrics.aggregate() as other:
|
| 68 |
+
with metrics.aggregate(name):
|
| 69 |
+
metrics.log_scalar("loss", 2)
|
| 70 |
+
metrics.log_scalar("loss", 6)
|
| 71 |
+
|
| 72 |
+
self.assertEqual(metrics.get_smoothed_values(name)["loss"], 3)
|
| 73 |
+
self.assertEqual(other.get_smoothed_values()["loss"], 2)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
unittest.main()
|
fairseq-0.10.2/tests/test_sequence_scorer.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import unittest
|
| 8 |
+
|
| 9 |
+
import tests.utils as test_utils
|
| 10 |
+
import torch
|
| 11 |
+
from fairseq.sequence_scorer import SequenceScorer
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class TestSequenceScorer(unittest.TestCase):
|
| 15 |
+
def test_sequence_scorer(self):
|
| 16 |
+
# construct dummy dictionary
|
| 17 |
+
d = test_utils.dummy_dictionary(vocab_size=2)
|
| 18 |
+
self.assertEqual(d.pad(), 1)
|
| 19 |
+
self.assertEqual(d.eos(), 2)
|
| 20 |
+
self.assertEqual(d.unk(), 3)
|
| 21 |
+
eos = d.eos()
|
| 22 |
+
w1 = 4
|
| 23 |
+
w2 = 5
|
| 24 |
+
|
| 25 |
+
# construct dataloader
|
| 26 |
+
data = [
|
| 27 |
+
{
|
| 28 |
+
"source": torch.LongTensor([w1, w2, eos]),
|
| 29 |
+
"target": torch.LongTensor([w1, w2, w1, eos]),
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"source": torch.LongTensor([w2, eos]),
|
| 33 |
+
"target": torch.LongTensor([w2, w1, eos]),
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"source": torch.LongTensor([w2, eos]),
|
| 37 |
+
"target": torch.LongTensor([w2, eos]),
|
| 38 |
+
},
|
| 39 |
+
]
|
| 40 |
+
data_itr = test_utils.dummy_dataloader(data)
|
| 41 |
+
|
| 42 |
+
# specify expected output probabilities
|
| 43 |
+
args = argparse.Namespace()
|
| 44 |
+
unk = 0.0
|
| 45 |
+
args.beam_probs = [
|
| 46 |
+
# step 0:
|
| 47 |
+
torch.FloatTensor(
|
| 48 |
+
[
|
| 49 |
+
# eos w1 w2
|
| 50 |
+
[0.0, unk, 0.6, 0.4], # sentence 1
|
| 51 |
+
[0.0, unk, 0.4, 0.6], # sentence 2
|
| 52 |
+
[0.0, unk, 0.7, 0.3], # sentence 3
|
| 53 |
+
]
|
| 54 |
+
),
|
| 55 |
+
# step 1:
|
| 56 |
+
torch.FloatTensor(
|
| 57 |
+
[
|
| 58 |
+
# eos w1 w2
|
| 59 |
+
[0.0, unk, 0.2, 0.7], # sentence 1
|
| 60 |
+
[0.0, unk, 0.8, 0.2], # sentence 2
|
| 61 |
+
[0.7, unk, 0.1, 0.2], # sentence 3
|
| 62 |
+
]
|
| 63 |
+
),
|
| 64 |
+
# step 2:
|
| 65 |
+
torch.FloatTensor(
|
| 66 |
+
[
|
| 67 |
+
# eos w1 w2
|
| 68 |
+
[0.10, unk, 0.50, 0.4], # sentence 1
|
| 69 |
+
[0.15, unk, 0.15, 0.7], # sentence 2
|
| 70 |
+
[0.00, unk, 0.00, 0.0], # sentence 3
|
| 71 |
+
]
|
| 72 |
+
),
|
| 73 |
+
# step 3:
|
| 74 |
+
torch.FloatTensor(
|
| 75 |
+
[
|
| 76 |
+
# eos w1 w2
|
| 77 |
+
[0.9, unk, 0.05, 0.05], # sentence 1
|
| 78 |
+
[0.0, unk, 0.00, 0.0], # sentence 2
|
| 79 |
+
[0.0, unk, 0.00, 0.0], # sentence 3
|
| 80 |
+
]
|
| 81 |
+
),
|
| 82 |
+
]
|
| 83 |
+
expected_scores = [
|
| 84 |
+
[0.6, 0.7, 0.5, 0.9], # sentence 1
|
| 85 |
+
[0.6, 0.8, 0.15], # sentence 2
|
| 86 |
+
[0.3, 0.7], # sentence 3
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
+
task = test_utils.TestTranslationTask.setup_task(args, d, d)
|
| 90 |
+
model = task.build_model(args)
|
| 91 |
+
scorer = SequenceScorer(task.target_dictionary)
|
| 92 |
+
for sample in data_itr:
|
| 93 |
+
hypos = task.inference_step(scorer, [model], sample)
|
| 94 |
+
for id, hypos_id in zip(sample["id"].tolist(), hypos):
|
| 95 |
+
self.assertHypoTokens(hypos_id[0], data[id]["target"])
|
| 96 |
+
self.assertHypoScore(hypos_id[0], expected_scores[id])
|
| 97 |
+
|
| 98 |
+
def assertHypoTokens(self, hypo, tokens):
|
| 99 |
+
self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens))
|
| 100 |
+
|
| 101 |
+
def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
|
| 102 |
+
pos_scores = torch.FloatTensor(pos_probs).log()
|
| 103 |
+
self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
|
| 104 |
+
self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
|
| 105 |
+
score = pos_scores.sum()
|
| 106 |
+
if normalized:
|
| 107 |
+
score /= pos_scores.numel() ** lenpen
|
| 108 |
+
self.assertLess(abs(score - hypo["score"]), 1e-6)
|
| 109 |
+
|
| 110 |
+
def assertAlmostEqual(self, t1, t2):
|
| 111 |
+
self.assertEqual(t1.size(), t2.size(), "size mismatch")
|
| 112 |
+
self.assertLess((t1 - t2).abs().max(), 1e-4)
|
| 113 |
+
|
| 114 |
+
def assertTensorEqual(self, t1, t2):
|
| 115 |
+
self.assertEqual(t1.size(), t2.size(), "size mismatch")
|
| 116 |
+
self.assertEqual(t1.ne(t2).long().sum(), 0)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
unittest.main()
|
fairseq-0.10.2/tests/test_token_block_dataset.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
#
|
| 3 |
+
# This source code is licensed under the MIT license found in the
|
| 4 |
+
# LICENSE file in the root directory of this source tree.
|
| 5 |
+
|
| 6 |
+
import unittest
|
| 7 |
+
|
| 8 |
+
import tests.utils as test_utils
|
| 9 |
+
import torch
|
| 10 |
+
from fairseq.data import TokenBlockDataset
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestTokenBlockDataset(unittest.TestCase):
|
| 14 |
+
def _build_dataset(self, data, **kwargs):
|
| 15 |
+
sizes = [len(x) for x in data]
|
| 16 |
+
underlying_ds = test_utils.TestDataset(data)
|
| 17 |
+
return TokenBlockDataset(underlying_ds, sizes, **kwargs)
|
| 18 |
+
|
| 19 |
+
def test_eos_break_mode(self):
|
| 20 |
+
data = [
|
| 21 |
+
torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
|
| 22 |
+
torch.tensor([1], dtype=torch.long),
|
| 23 |
+
torch.tensor([8, 7, 6, 1], dtype=torch.long),
|
| 24 |
+
]
|
| 25 |
+
ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
|
| 26 |
+
self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
|
| 27 |
+
self.assertEqual(ds[1].tolist(), [1])
|
| 28 |
+
self.assertEqual(ds[2].tolist(), [8, 7, 6, 1])
|
| 29 |
+
|
| 30 |
+
data = [
|
| 31 |
+
torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
|
| 32 |
+
torch.tensor([8, 7, 6, 1], dtype=torch.long),
|
| 33 |
+
torch.tensor([1], dtype=torch.long),
|
| 34 |
+
]
|
| 35 |
+
ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
|
| 36 |
+
self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
|
| 37 |
+
self.assertEqual(ds[1].tolist(), [8, 7, 6, 1])
|
| 38 |
+
self.assertEqual(ds[2].tolist(), [1])
|
| 39 |
+
|
| 40 |
+
def test_block_break_mode(self):
|
| 41 |
+
data = [
|
| 42 |
+
torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
|
| 43 |
+
torch.tensor([8, 7, 6, 1], dtype=torch.long),
|
| 44 |
+
torch.tensor([9, 1], dtype=torch.long),
|
| 45 |
+
]
|
| 46 |
+
ds = self._build_dataset(data, block_size=3, pad=0, eos=1, break_mode="none")
|
| 47 |
+
self.assertEqual(ds[0].tolist(), [5, 4, 3])
|
| 48 |
+
self.assertEqual(ds[1].tolist(), [2, 1, 8])
|
| 49 |
+
self.assertEqual(ds[2].tolist(), [7, 6, 1])
|
| 50 |
+
self.assertEqual(ds[3].tolist(), [9, 1])
|
| 51 |
+
|
| 52 |
+
def test_complete_break_mode(self):
|
| 53 |
+
data = [
|
| 54 |
+
torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
|
| 55 |
+
torch.tensor([8, 7, 6, 1], dtype=torch.long),
|
| 56 |
+
torch.tensor([9, 1], dtype=torch.long),
|
| 57 |
+
]
|
| 58 |
+
ds = self._build_dataset(
|
| 59 |
+
data, block_size=6, pad=0, eos=1, break_mode="complete"
|
| 60 |
+
)
|
| 61 |
+
self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
|
| 62 |
+
self.assertEqual(ds[1].tolist(), [8, 7, 6, 1, 9, 1])
|
| 63 |
+
|
| 64 |
+
data = [
|
| 65 |
+
torch.tensor([4, 3, 2, 1], dtype=torch.long),
|
| 66 |
+
torch.tensor([5, 1], dtype=torch.long),
|
| 67 |
+
torch.tensor([1], dtype=torch.long),
|
| 68 |
+
torch.tensor([6, 1], dtype=torch.long),
|
| 69 |
+
]
|
| 70 |
+
ds = self._build_dataset(
|
| 71 |
+
data, block_size=3, pad=0, eos=1, break_mode="complete"
|
| 72 |
+
)
|
| 73 |
+
self.assertEqual(ds[0].tolist(), [4, 3, 2, 1])
|
| 74 |
+
self.assertEqual(ds[1].tolist(), [5, 1, 1])
|
| 75 |
+
self.assertEqual(ds[2].tolist(), [6, 1])
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
unittest.main()
|
mosesdecoder/.beautify-ignore
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Files and directories that beautify.py should not clean up.
|
| 2 |
+
#
|
| 3 |
+
# This file is not as advanced as, say, .gitignore. It only supports files
|
| 4 |
+
# and directory paths relative to the project root, one per line, no globs,
|
| 5 |
+
# no quotes.
|
| 6 |
+
#
|
| 7 |
+
# Leading and trailing whitespace is stripped from filenames, but internal
|
| 8 |
+
# whitespace is preserved.
|
| 9 |
+
#
|
| 10 |
+
# Lines starting with a hash mark, such as this one, are comments. The hash
|
| 11 |
+
# mark must be the first character on the line. Blank lines are ignored.
|
| 12 |
+
#
|
| 13 |
+
# The .beautify-ignore file must be encoded in UTF-8.
|
| 14 |
+
|
| 15 |
+
boost
|
| 16 |
+
contrib
|
| 17 |
+
irstlm
|
| 18 |
+
jam-files
|
| 19 |
+
lm
|
| 20 |
+
mingw/MosesGUI/icons_rc.py
|
| 21 |
+
mingw/MosesGUI/Ui_credits.py
|
| 22 |
+
mingw/MosesGUI/Ui_mainWindow.py
|
| 23 |
+
moses/TranslationModel/UG
|
| 24 |
+
moses/server
|
| 25 |
+
moses/parameters
|
| 26 |
+
moses/thread_safe_container.h
|
| 27 |
+
phrase-extract/pcfg-common
|
| 28 |
+
phrase-extract/syntax-common
|
| 29 |
+
randlm
|
| 30 |
+
# Filename suffixes in here are language codes, so e.g. ".pl" means
|
| 31 |
+
# Polish, not Perl.
|
| 32 |
+
scripts/share/nonbreaking_prefixes
|
| 33 |
+
search
|
| 34 |
+
srilm
|
| 35 |
+
util
|
| 36 |
+
xmlrpc-c
|
| 37 |
+
.git
|
| 38 |
+
util/ug_cache_with_timeout.h
|
mosesdecoder/.travis.yml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sudo: false
|
| 2 |
+
dist: trusty
|
| 3 |
+
language: c
|
| 4 |
+
compiler: gcc
|
| 5 |
+
env:
|
| 6 |
+
matrix:
|
| 7 |
+
addons:
|
| 8 |
+
apt:
|
| 9 |
+
sources:
|
| 10 |
+
- ubuntu-toolchain-r-test
|
| 11 |
+
packages:
|
| 12 |
+
- subversion
|
| 13 |
+
- automake
|
| 14 |
+
- libtool
|
| 15 |
+
- zlib1g-dev
|
| 16 |
+
- libbz2-dev
|
| 17 |
+
- liblzma-dev
|
| 18 |
+
- libboost-all-dev
|
| 19 |
+
- libgoogle-perftools-dev
|
| 20 |
+
- libxmlrpc-c++.*-dev
|
| 21 |
+
- cmake
|
| 22 |
+
- csh
|
| 23 |
+
script:
|
| 24 |
+
- ./bjam -j4
|
mosesdecoder/azure-pipelines.yml
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Starter pipeline
|
| 2 |
+
# Start with a minimal pipeline that you can customize to build and deploy your code.
|
| 3 |
+
# Add steps that build, run tests, deploy, and more:
|
| 4 |
+
# https://aka.ms/yaml
|
| 5 |
+
|
| 6 |
+
trigger:
|
| 7 |
+
- master
|
| 8 |
+
|
| 9 |
+
pool:
|
| 10 |
+
#vmImage: 'ubuntu-latest'
|
| 11 |
+
vmImage: 'ubuntu-16.04'
|
| 12 |
+
|
| 13 |
+
steps:
|
| 14 |
+
|
| 15 |
+
- script: |
|
| 16 |
+
echo Printing some environment information
|
| 17 |
+
echo HOME: $HOME
|
| 18 |
+
echo
|
| 19 |
+
echo UBUNTU VERSION:
|
| 20 |
+
cat /etc/lsb-release
|
| 21 |
+
echo
|
| 22 |
+
echo CPU INFO
|
| 23 |
+
cat /proc/cpuinfo
|
| 24 |
+
echo
|
| 25 |
+
echo MEM INFO
|
| 26 |
+
cat /proc/meminfo
|
| 27 |
+
echo
|
| 28 |
+
echo DISK INFO
|
| 29 |
+
df -h
|
| 30 |
+
echo
|
| 31 |
+
echo PWD: $PWD
|
| 32 |
+
echo
|
| 33 |
+
ls
|
| 34 |
+
displayName: 'Printing some environment information'
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
## Installation commands for Ubuntu
|
| 38 |
+
- script: |
|
| 39 |
+
sudo apt-get install \
|
| 40 |
+
g++ \
|
| 41 |
+
git \
|
| 42 |
+
subversion \
|
| 43 |
+
automake \
|
| 44 |
+
libtool \
|
| 45 |
+
zlib1g-dev \
|
| 46 |
+
libicu-dev \
|
| 47 |
+
libboost-all-dev \
|
| 48 |
+
libssl-dev \
|
| 49 |
+
libbz2-dev \
|
| 50 |
+
liblzma-dev \
|
| 51 |
+
python-dev \
|
| 52 |
+
graphviz \
|
| 53 |
+
imagemagick \
|
| 54 |
+
make \
|
| 55 |
+
cmake \
|
| 56 |
+
libgoogle-perftools-dev \
|
| 57 |
+
autoconf \
|
| 58 |
+
doxygen
|
| 59 |
+
displayName: 'Install Ubuntu packages'
|
| 60 |
+
|
| 61 |
+
- script: |
|
| 62 |
+
wget "https://sourceforge.net/projects/cmph/files/v2.0.2/cmph-2.0.2.tar.gz/download"
|
| 63 |
+
mv download cmph-2.0.2.tar.gz
|
| 64 |
+
tar xvzf cmph-2.0.2.tar.gz
|
| 65 |
+
cd cmph-2.0.2
|
| 66 |
+
./configure --prefix=$PWD
|
| 67 |
+
make
|
| 68 |
+
make install
|
| 69 |
+
cd ..
|
| 70 |
+
displayName: 'Build and Install cmph'
|
| 71 |
+
|
| 72 |
+
- script: |
|
| 73 |
+
wget "https://sourceforge.net/projects/xmlrpc-c/files/Xmlrpc-c%20Super%20Stable/1.51.06/xmlrpc-c-1.51.06.tgz/download"
|
| 74 |
+
mv download xmlrpc-c-1.51.06.tgz
|
| 75 |
+
tar xvzf xmlrpc-c-1.51.06.tgz
|
| 76 |
+
cd xmlrpc-c-1.51.06
|
| 77 |
+
./configure --prefix=$PWD
|
| 78 |
+
make
|
| 79 |
+
make install
|
| 80 |
+
sudo ldconfig
|
| 81 |
+
cd ..
|
| 82 |
+
displayName: 'Build and Install xmlrpc-c'
|
| 83 |
+
|
| 84 |
+
- script: |
|
| 85 |
+
./bjam \
|
| 86 |
+
--with-cmph=$PWD/cmph-2.0.2 \
|
| 87 |
+
--with-xmlrpc-c=$PWD/xmlrpc-c-1.51.06 \
|
| 88 |
+
-j2
|
| 89 |
+
displayName: 'Build Moses'
|
| 90 |
+
|
| 91 |
+
# - script: |
|
| 92 |
+
# ./bjam \
|
| 93 |
+
# -j2
|
| 94 |
+
# displayName: 'Build Moses'
|
| 95 |
+
|
| 96 |
+
# - task: ComponentGovernanceComponentDetection@0
|
| 97 |
+
# inputs:
|
| 98 |
+
# scanType: 'Register'
|
| 99 |
+
# verbosity: 'Verbose'
|
| 100 |
+
# alertWarningLevel: 'High'
|
mosesdecoder/biconcor/Jamfile
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
|
| 2 |
+
exe phrase-lookup : Vocabulary.cpp SuffixArray.cpp phrase-lookup.cpp ;
|
mosesdecoder/biconcor/PhrasePairCollection.cpp
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "PhrasePairCollection.h"
|
| 2 |
+
|
| 3 |
+
#include <cstdlib>
|
| 4 |
+
#include <cstring>
|
| 5 |
+
#include <algorithm>
|
| 6 |
+
|
| 7 |
+
#include "Vocabulary.h"
|
| 8 |
+
#include "SuffixArray.h"
|
| 9 |
+
#include "TargetCorpus.h"
|
| 10 |
+
#include "Alignment.h"
|
| 11 |
+
#include "PhrasePair.h"
|
| 12 |
+
#include "Mismatch.h"
|
| 13 |
+
|
| 14 |
+
using namespace std;
|
| 15 |
+
|
| 16 |
+
PhrasePairCollection::PhrasePairCollection( SuffixArray *sa, TargetCorpus *tc, Alignment *a, int max_translation, int max_example )
|
| 17 |
+
:m_suffixArray(sa)
|
| 18 |
+
,m_targetCorpus(tc)
|
| 19 |
+
,m_alignment(a)
|
| 20 |
+
,m_size(0)
|
| 21 |
+
,m_max_lookup(10000) // maximum number of source occurrences sampled
|
| 22 |
+
,m_max_translation(max_translation) // max number of different distinct translations returned
|
| 23 |
+
,m_max_example(max_example) // max number of examples returned for each distinct translation
|
| 24 |
+
{}
|
| 25 |
+
|
| 26 |
+
PhrasePairCollection::~PhrasePairCollection()
|
| 27 |
+
{}
|
| 28 |
+
|
| 29 |
+
int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
|
| 30 |
+
{
|
| 31 |
+
INDEX first_match, last_match;
|
| 32 |
+
if (! m_suffixArray->FindMatches( sourceString, first_match, last_match )) {
|
| 33 |
+
return 0;
|
| 34 |
+
}
|
| 35 |
+
//cerr << "\tfirst match " << first_match << endl;
|
| 36 |
+
//cerr << "\tlast match " << last_match << endl;
|
| 37 |
+
|
| 38 |
+
INDEX found = last_match - first_match +1;
|
| 39 |
+
|
| 40 |
+
map< vector< WORD_ID >, INDEX > index;
|
| 41 |
+
int real_count = 0;
|
| 42 |
+
for( INDEX i=first_match; i<=last_match; i++ ) {
|
| 43 |
+
int position = m_suffixArray->GetPosition( i );
|
| 44 |
+
int source_start = m_suffixArray->GetWordInSentence( position );
|
| 45 |
+
int source_end = source_start + sourceString.size()-1;
|
| 46 |
+
INDEX sentence_id = m_suffixArray->GetSentence( position );
|
| 47 |
+
int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );
|
| 48 |
+
int target_length = m_targetCorpus->GetSentenceLength( sentence_id );
|
| 49 |
+
//cerr << "match " << (i-first_match)
|
| 50 |
+
//<< " in sentence " << sentence_id
|
| 51 |
+
//<< ", starting at word " << source_start
|
| 52 |
+
//<< " of " << sentence_length
|
| 53 |
+
//<< ". target sentence has " << target_length << " words.";
|
| 54 |
+
int target_start, target_end, pre_null, post_null;
|
| 55 |
+
if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
|
| 56 |
+
//cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
|
| 57 |
+
//cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
|
| 58 |
+
bool null_boundary_words = false;
|
| 59 |
+
for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
|
| 60 |
+
for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
|
| 61 |
+
vector< WORD_ID > targetString;
|
| 62 |
+
//cerr << "; ";
|
| 63 |
+
for (int target = target_start - pre; target <= target_end + post; target++) {
|
| 64 |
+
targetString.push_back( m_targetCorpus->GetWordId( sentence_id, target) );
|
| 65 |
+
//cerr << m_targetCorpus->GetWord( sentence_id, target) << " ";
|
| 66 |
+
}
|
| 67 |
+
PhrasePair *phrasePair = new PhrasePair( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, target_length, position, source_start, source_end, target_start-pre, target_end+post, pre, post, pre_null-pre, post_null-post);
|
| 68 |
+
// matchCollection.Add( sentence_id, )
|
| 69 |
+
if (index.find( targetString ) == index.end()) {
|
| 70 |
+
index[targetString] = m_collection.size();
|
| 71 |
+
vector< PhrasePair* > emptyVector;
|
| 72 |
+
m_collection.push_back( emptyVector );
|
| 73 |
+
}
|
| 74 |
+
m_collection[ index[targetString] ].push_back( phrasePair );
|
| 75 |
+
m_size++;
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
} else {
|
| 79 |
+
//cerr << "mismatch " << (i-first_match)
|
| 80 |
+
// << " in sentence " << sentence_id
|
| 81 |
+
// << ", starting at word " << source_start
|
| 82 |
+
// << " of " << sentence_length
|
| 83 |
+
// << ". target sentence has " << target_length << " words.";
|
| 84 |
+
Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
|
| 85 |
+
if (mismatch->Unaligned())
|
| 86 |
+
m_unaligned.push_back( mismatch );
|
| 87 |
+
else
|
| 88 |
+
m_mismatch.push_back( mismatch );
|
| 89 |
+
}
|
| 90 |
+
//cerr << endl;
|
| 91 |
+
|
| 92 |
+
if (found > (INDEX)m_max_lookup) {
|
| 93 |
+
i += found/m_max_lookup-1;
|
| 94 |
+
}
|
| 95 |
+
real_count++;
|
| 96 |
+
}
|
| 97 |
+
sort(m_collection.begin(), m_collection.end(), CompareBySize());
|
| 98 |
+
return real_count;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
void PhrasePairCollection::Print(bool pretty) const
|
| 102 |
+
{
|
| 103 |
+
vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
|
| 104 |
+
int i=0;
|
| 105 |
+
for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && i<m_max_translation; i++, ppWithSameTarget++ ) {
|
| 106 |
+
(*(ppWithSameTarget->begin()))->PrintTarget( &cout );
|
| 107 |
+
int count = ppWithSameTarget->size();
|
| 108 |
+
cout << "(" << count << ")" << endl;
|
| 109 |
+
vector< PhrasePair* >::const_iterator p = ppWithSameTarget->begin();
|
| 110 |
+
for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
|
| 111 |
+
if (pretty) {
|
| 112 |
+
(*p)->PrintPretty( &cout, 100 );
|
| 113 |
+
} else {
|
| 114 |
+
(*p)->Print( &cout );
|
| 115 |
+
}
|
| 116 |
+
if (ppWithSameTarget->size() > m_max_example) {
|
| 117 |
+
p += ppWithSameTarget->size()/m_max_example-1;
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
void PhrasePairCollection::PrintHTML() const
|
| 124 |
+
{
|
| 125 |
+
int pp_target = 0;
|
| 126 |
+
bool singleton = false;
|
| 127 |
+
// loop over all translations
|
| 128 |
+
vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
|
| 129 |
+
for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {
|
| 130 |
+
|
| 131 |
+
int count = ppWithSameTarget->size();
|
| 132 |
+
if (!singleton) {
|
| 133 |
+
if (count == 1) {
|
| 134 |
+
singleton = true;
|
| 135 |
+
cout << "<p class=\"pp_singleton_header\">singleton"
|
| 136 |
+
<< (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
|
| 137 |
+
<< (m_collection.end() - ppWithSameTarget)
|
| 138 |
+
<< "/" << m_size << ")</p>";
|
| 139 |
+
} else {
|
| 140 |
+
cout << "<p class=\"pp_target_header\">";
|
| 141 |
+
(*(ppWithSameTarget->begin()))->PrintTarget( &cout );
|
| 142 |
+
cout << " (" << count << "/" << m_size << ")" << endl;
|
| 143 |
+
cout << "<p><div id=\"pp_" << pp_target << "\">";
|
| 144 |
+
}
|
| 145 |
+
cout << "<table align=\"center\">";
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
vector< PhrasePair* >::const_iterator p;
|
| 149 |
+
// loop over all sentences where translation occurs
|
| 150 |
+
int pp=0;
|
| 151 |
+
int i=0;
|
| 152 |
+
for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
|
| 153 |
+
(*p)->PrintClippedHTML( &cout, 160 );
|
| 154 |
+
if (count > m_max_example) {
|
| 155 |
+
p += count/m_max_example-1;
|
| 156 |
+
pp += count/m_max_example-1;
|
| 157 |
+
}
|
| 158 |
+
}
|
| 159 |
+
if (i == 10 && pp < count) {
|
| 160 |
+
// extended table
|
| 161 |
+
cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
|
| 162 |
+
cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
|
| 163 |
+
cout << "<table align=\"center\">";
|
| 164 |
+
for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
|
| 165 |
+
(*p)->PrintClippedHTML( &cout, 160 );
|
| 166 |
+
if (count > m_max_example) {
|
| 167 |
+
p += count/m_max_example-1;
|
| 168 |
+
pp += count/m_max_example-1;
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
if (!singleton) cout << "</table></div>\n";
|
| 173 |
+
|
| 174 |
+
if (!singleton && pp_target == 9) {
|
| 175 |
+
cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
|
| 176 |
+
cout << "<p class=\"pp_target_header\">(more)</p></div>";
|
| 177 |
+
cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
|
| 178 |
+
}
|
| 179 |
+
}
|
| 180 |
+
if (singleton) cout << "</table></div>\n";
|
| 181 |
+
else if (pp_target > 9) cout << "</div>";
|
| 182 |
+
|
| 183 |
+
size_t max_mismatch = m_max_example/3;
|
| 184 |
+
// unaligned phrases
|
| 185 |
+
if (m_unaligned.size() > 0) {
|
| 186 |
+
cout << "<p class=\"pp_singleton_header\">unaligned"
|
| 187 |
+
<< " (" << (m_unaligned.size()) << ")</p>";
|
| 188 |
+
cout << "<table align=\"center\">";
|
| 189 |
+
int step_size = 1;
|
| 190 |
+
if (m_unaligned.size() > max_mismatch)
|
| 191 |
+
step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
|
| 192 |
+
for(size_t i=0; i<m_unaligned.size(); i+=step_size)
|
| 193 |
+
m_unaligned[i]->PrintClippedHTML( &cout, 160 );
|
| 194 |
+
cout << "</table>";
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
// mismatched phrases
|
| 198 |
+
if (m_mismatch.size() > 0) {
|
| 199 |
+
cout << "<p class=\"pp_singleton_header\">mismatched"
|
| 200 |
+
<< " (" << (m_mismatch.size()) << ")</p>";
|
| 201 |
+
cout << "<table align=\"center\">";
|
| 202 |
+
int step_size = 1;
|
| 203 |
+
if (m_mismatch.size() > max_mismatch)
|
| 204 |
+
step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
|
| 205 |
+
for(size_t i=0; i<m_mismatch.size(); i+=step_size)
|
| 206 |
+
m_mismatch[i]->PrintClippedHTML( &cout, 160 );
|
| 207 |
+
cout << "</table>";
|
| 208 |
+
}
|
| 209 |
+
}
|
mosesdecoder/biconcor/PhrasePairCollection.h
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include <vector>
|
| 4 |
+
#include <string>
|
| 5 |
+
|
| 6 |
+
class Alignment;
|
| 7 |
+
class PhrasePair;
|
| 8 |
+
class SuffixArray;
|
| 9 |
+
class TargetCorpus;
|
| 10 |
+
class Mismatch;
|
| 11 |
+
|
| 12 |
+
class PhrasePairCollection
|
| 13 |
+
{
|
| 14 |
+
public:
|
| 15 |
+
typedef unsigned int INDEX;
|
| 16 |
+
|
| 17 |
+
private:
|
| 18 |
+
SuffixArray *m_suffixArray;
|
| 19 |
+
TargetCorpus *m_targetCorpus;
|
| 20 |
+
Alignment *m_alignment;
|
| 21 |
+
std::vector<std::vector<PhrasePair*> > m_collection;
|
| 22 |
+
std::vector< Mismatch* > m_mismatch, m_unaligned;
|
| 23 |
+
int m_size;
|
| 24 |
+
int m_max_lookup;
|
| 25 |
+
int m_max_translation;
|
| 26 |
+
int m_max_example;
|
| 27 |
+
|
| 28 |
+
// No copying allowed.
|
| 29 |
+
PhrasePairCollection(const PhrasePairCollection&);
|
| 30 |
+
void operator=(const PhrasePairCollection&);
|
| 31 |
+
|
| 32 |
+
public:
|
| 33 |
+
PhrasePairCollection ( SuffixArray *, TargetCorpus *, Alignment *, int, int );
|
| 34 |
+
~PhrasePairCollection ();
|
| 35 |
+
|
| 36 |
+
int GetCollection( const std::vector<std::string >& sourceString );
|
| 37 |
+
void Print(bool pretty) const;
|
| 38 |
+
void PrintHTML() const;
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
// sorting helper
|
| 42 |
+
struct CompareBySize {
|
| 43 |
+
bool operator()(const std::vector<PhrasePair*>& a, const std::vector<PhrasePair*>& b ) const {
|
| 44 |
+
return a.size() > b.size();
|
| 45 |
+
}
|
| 46 |
+
};
|
mosesdecoder/biconcor/SuffixArray.h
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "Vocabulary.h"
|
| 4 |
+
|
| 5 |
+
class SuffixArray
|
| 6 |
+
{
|
| 7 |
+
public:
|
| 8 |
+
typedef unsigned int INDEX;
|
| 9 |
+
|
| 10 |
+
private:
|
| 11 |
+
WORD_ID *m_array;
|
| 12 |
+
INDEX *m_index;
|
| 13 |
+
INDEX *m_buffer;
|
| 14 |
+
char *m_wordInSentence;
|
| 15 |
+
INDEX *m_sentence;
|
| 16 |
+
char *m_sentenceLength;
|
| 17 |
+
WORD_ID m_endOfSentence;
|
| 18 |
+
INDEX *m_document;
|
| 19 |
+
INDEX *m_documentName;
|
| 20 |
+
char *m_documentNameBuffer;
|
| 21 |
+
size_t m_documentNameLength;
|
| 22 |
+
size_t m_documentCount;
|
| 23 |
+
bool m_useDocument;
|
| 24 |
+
Vocabulary m_vcb;
|
| 25 |
+
INDEX m_size;
|
| 26 |
+
INDEX m_sentenceCount;
|
| 27 |
+
|
| 28 |
+
// No copying allowed.
|
| 29 |
+
SuffixArray(const SuffixArray&);
|
| 30 |
+
void operator=(const SuffixArray&);
|
| 31 |
+
|
| 32 |
+
public:
|
| 33 |
+
SuffixArray();
|
| 34 |
+
~SuffixArray();
|
| 35 |
+
|
| 36 |
+
void Create(const std::string& fileName );
|
| 37 |
+
bool ProcessDocumentLine( const char* const, const size_t );
|
| 38 |
+
void Sort(INDEX start, INDEX end);
|
| 39 |
+
int CompareIndex( INDEX a, INDEX b ) const;
|
| 40 |
+
inline int CompareWord( WORD_ID a, WORD_ID b ) const;
|
| 41 |
+
int Count( const std::vector< WORD > &phrase );
|
| 42 |
+
bool MinCount( const std::vector< WORD > &phrase, INDEX min );
|
| 43 |
+
bool Exists( const std::vector< WORD > &phrase );
|
| 44 |
+
int FindMatches( const std::vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
|
| 45 |
+
int LimitedCount( const std::vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
|
| 46 |
+
INDEX FindFirst( const std::vector< WORD > &phrase, INDEX &start, INDEX &end );
|
| 47 |
+
INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
|
| 48 |
+
int Match( const std::vector< WORD > &phrase, INDEX index );
|
| 49 |
+
void List( INDEX start, INDEX end );
|
| 50 |
+
void PrintSentenceMatches( const std::vector< WORD > &phrase );
|
| 51 |
+
inline INDEX GetPosition( INDEX index ) const {
|
| 52 |
+
return m_index[ index ];
|
| 53 |
+
}
|
| 54 |
+
inline INDEX GetSentence( INDEX position ) const {
|
| 55 |
+
return m_sentence[position];
|
| 56 |
+
}
|
| 57 |
+
inline char GetWordInSentence( INDEX position ) const {
|
| 58 |
+
return m_wordInSentence[position];
|
| 59 |
+
}
|
| 60 |
+
inline char GetSentenceLength( INDEX sentenceId ) const {
|
| 61 |
+
return m_sentenceLength[sentenceId];
|
| 62 |
+
}
|
| 63 |
+
inline INDEX GetSize() const {
|
| 64 |
+
return m_size;
|
| 65 |
+
}
|
| 66 |
+
inline WORD GetWord( INDEX position ) const {
|
| 67 |
+
return m_vcb.GetWord( m_array[position] );
|
| 68 |
+
}
|
| 69 |
+
void UseDocument() {
|
| 70 |
+
m_useDocument = true;
|
| 71 |
+
}
|
| 72 |
+
INDEX GetDocument( INDEX sentence ) const;
|
| 73 |
+
void PrintDocumentName( INDEX document ) {
|
| 74 |
+
for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
|
| 75 |
+
std::cout << m_documentNameBuffer[ i ];
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
void Save(const std::string& fileName ) const;
|
| 79 |
+
void Load(const std::string& fileName );
|
| 80 |
+
void CheckAllocation(bool, const char *dataStructure) const;
|
| 81 |
+
bool Error( const char* message, const std::string& fileName) const;
|
| 82 |
+
};
|
mosesdecoder/biconcor/biconcor.cpp
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "SuffixArray.h"
|
| 2 |
+
#include "TargetCorpus.h"
|
| 3 |
+
#include "Alignment.h"
|
| 4 |
+
#include "PhrasePairCollection.h"
|
| 5 |
+
#include <getopt.h>
|
| 6 |
+
#include "base64.h"
|
| 7 |
+
|
| 8 |
+
using namespace std;
|
| 9 |
+
|
| 10 |
+
int main(int argc, char* argv[])
|
| 11 |
+
{
|
| 12 |
+
// handle parameters
|
| 13 |
+
string query;
|
| 14 |
+
string fileNameSuffix;
|
| 15 |
+
string fileNameSource;
|
| 16 |
+
string fileNameTarget = "";
|
| 17 |
+
string fileNameAlignment = "";
|
| 18 |
+
int loadFlag = false;
|
| 19 |
+
int saveFlag = false;
|
| 20 |
+
int createFlag = false;
|
| 21 |
+
int queryFlag = false;
|
| 22 |
+
int htmlFlag = false; // output as HTML
|
| 23 |
+
int prettyFlag = false; // output readable on screen
|
| 24 |
+
int stdioFlag = false; // receive requests from STDIN, respond to STDOUT
|
| 25 |
+
int max_translation = 20;
|
| 26 |
+
int max_example = 50;
|
| 27 |
+
string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create source-corpus]\n\t[--query string]\n\t[--target target-corpus]\n\t[--alignment file]\n\t[--translations count]\n\t[--examples count]\n\t[--html]\n\t[--stdio]\n";
|
| 28 |
+
while(1) {
|
| 29 |
+
static struct option long_options[] = {
|
| 30 |
+
{"load", required_argument, 0, 'l'},
|
| 31 |
+
{"save", required_argument, 0, 's'},
|
| 32 |
+
{"create", required_argument, 0, 'c'},
|
| 33 |
+
{"query", required_argument, 0, 'q'},
|
| 34 |
+
{"target", required_argument, 0, 't'},
|
| 35 |
+
{"alignment", required_argument, 0, 'a'},
|
| 36 |
+
{"html", no_argument, 0, 'h'},
|
| 37 |
+
{"pretty", no_argument, 0, 'p'},
|
| 38 |
+
{"stdio", no_argument, 0, 'i'},
|
| 39 |
+
{"translations", required_argument, 0, 'o'},
|
| 40 |
+
{"examples", required_argument, 0, 'e'},
|
| 41 |
+
{0, 0, 0, 0}
|
| 42 |
+
};
|
| 43 |
+
int option_index = 0;
|
| 44 |
+
int c = getopt_long (argc, argv, "l:s:c:q:Q:t:a:hpio:e:", long_options, &option_index);
|
| 45 |
+
if (c == -1) break;
|
| 46 |
+
switch (c) {
|
| 47 |
+
case 'l':
|
| 48 |
+
fileNameSuffix = string(optarg);
|
| 49 |
+
loadFlag = true;
|
| 50 |
+
break;
|
| 51 |
+
case 't':
|
| 52 |
+
fileNameTarget = string(optarg);
|
| 53 |
+
break;
|
| 54 |
+
case 'a':
|
| 55 |
+
fileNameAlignment = string(optarg);
|
| 56 |
+
break;
|
| 57 |
+
case 's':
|
| 58 |
+
fileNameSuffix = string(optarg);
|
| 59 |
+
saveFlag = true;
|
| 60 |
+
break;
|
| 61 |
+
case 'c':
|
| 62 |
+
fileNameSource = string(optarg);
|
| 63 |
+
createFlag = true;
|
| 64 |
+
break;
|
| 65 |
+
case 'Q':
|
| 66 |
+
query = base64_decode(string(optarg));
|
| 67 |
+
queryFlag = true;
|
| 68 |
+
break;
|
| 69 |
+
case 'q':
|
| 70 |
+
query = string(optarg);
|
| 71 |
+
queryFlag = true;
|
| 72 |
+
break;
|
| 73 |
+
case 'o':
|
| 74 |
+
max_translation = atoi(optarg);
|
| 75 |
+
break;
|
| 76 |
+
case 'e':
|
| 77 |
+
max_example = atoi(optarg);
|
| 78 |
+
break;
|
| 79 |
+
case 'p':
|
| 80 |
+
prettyFlag = true;
|
| 81 |
+
break;
|
| 82 |
+
case 'h':
|
| 83 |
+
htmlFlag = true;
|
| 84 |
+
break;
|
| 85 |
+
case 'i':
|
| 86 |
+
stdioFlag = true;
|
| 87 |
+
break;
|
| 88 |
+
default:
|
| 89 |
+
cerr << info;
|
| 90 |
+
exit(1);
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
if (stdioFlag) {
|
| 94 |
+
queryFlag = true;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// check if parameter settings are legal
|
| 98 |
+
if (saveFlag && !createFlag) {
|
| 99 |
+
cerr << "error: cannot save without creating\n" << info;
|
| 100 |
+
exit(1);
|
| 101 |
+
}
|
| 102 |
+
if (saveFlag && loadFlag) {
|
| 103 |
+
cerr << "error: cannot load and save at the same time\n" << info;
|
| 104 |
+
exit(1);
|
| 105 |
+
}
|
| 106 |
+
if (!loadFlag && !createFlag) {
|
| 107 |
+
cerr << "error: neither load or create - i have no info!\n" << info;
|
| 108 |
+
exit(1);
|
| 109 |
+
}
|
| 110 |
+
if (createFlag && (fileNameTarget == "" || fileNameAlignment == "")) {
|
| 111 |
+
cerr << "error: i have no target corpus or alignment\n" << info;
|
| 112 |
+
exit(1);
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
// do your thing
|
| 116 |
+
SuffixArray suffixArray;
|
| 117 |
+
TargetCorpus targetCorpus;
|
| 118 |
+
Alignment alignment;
|
| 119 |
+
if (createFlag) {
|
| 120 |
+
cerr << "will create\n";
|
| 121 |
+
cerr << "source corpus is in " << fileNameSource << endl;
|
| 122 |
+
suffixArray.Create( fileNameSource );
|
| 123 |
+
cerr << "target corpus is in " << fileNameTarget << endl;
|
| 124 |
+
targetCorpus.Create( fileNameTarget );
|
| 125 |
+
cerr << "alignment is in " << fileNameAlignment << endl;
|
| 126 |
+
alignment.Create( fileNameAlignment );
|
| 127 |
+
if (saveFlag) {
|
| 128 |
+
suffixArray.Save( fileNameSuffix );
|
| 129 |
+
targetCorpus.Save( fileNameSuffix );
|
| 130 |
+
alignment.Save( fileNameSuffix );
|
| 131 |
+
cerr << "will save in " << fileNameSuffix << endl;
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
if (loadFlag) {
|
| 135 |
+
cerr << "will load from " << fileNameSuffix << endl;
|
| 136 |
+
suffixArray.Load( fileNameSuffix );
|
| 137 |
+
targetCorpus.Load( fileNameSuffix );
|
| 138 |
+
alignment.Load( fileNameSuffix );
|
| 139 |
+
}
|
| 140 |
+
if (stdioFlag) {
|
| 141 |
+
cout << "-|||- BICONCOR START -|||-" << endl << flush;
|
| 142 |
+
while(true) {
|
| 143 |
+
string query;
|
| 144 |
+
if (getline(cin, query, '\n').eof()) {
|
| 145 |
+
return 0;
|
| 146 |
+
}
|
| 147 |
+
vector< string > queryString = alignment.Tokenize( query.c_str() );
|
| 148 |
+
PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
|
| 149 |
+
int total = ppCollection.GetCollection( queryString );
|
| 150 |
+
cout << "TOTAL: " << total << endl;
|
| 151 |
+
if (htmlFlag) {
|
| 152 |
+
ppCollection.PrintHTML();
|
| 153 |
+
} else {
|
| 154 |
+
ppCollection.Print(prettyFlag);
|
| 155 |
+
}
|
| 156 |
+
cout << "-|||- BICONCOR END -|||-" << endl << flush;
|
| 157 |
+
}
|
| 158 |
+
} else if (queryFlag) {
|
| 159 |
+
cerr << "query is " << query << endl;
|
| 160 |
+
vector< string > queryString = alignment.Tokenize( query.c_str() );
|
| 161 |
+
PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
|
| 162 |
+
ppCollection.GetCollection( queryString );
|
| 163 |
+
if (htmlFlag) {
|
| 164 |
+
ppCollection.PrintHTML();
|
| 165 |
+
} else {
|
| 166 |
+
ppCollection.Print(prettyFlag);
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
return 0;
|
| 171 |
+
}
|
mosesdecoder/chk.tmp
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
test
|
mosesdecoder/doxygen.conf
ADDED
|
@@ -0,0 +1,1781 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Doxyfile 1.7.6.1
|
| 2 |
+
|
| 3 |
+
# This file describes the settings to be used by the documentation system
|
| 4 |
+
# doxygen (www.doxygen.org) for a project.
|
| 5 |
+
#
|
| 6 |
+
# All text after a hash (#) is considered a comment and will be ignored.
|
| 7 |
+
# The format is:
|
| 8 |
+
# TAG = value [value, ...]
|
| 9 |
+
# For lists items can also be appended using:
|
| 10 |
+
# TAG += value [value, ...]
|
| 11 |
+
# Values that contain spaces should be placed between quotes (" ").
|
| 12 |
+
|
| 13 |
+
#---------------------------------------------------------------------------
|
| 14 |
+
# Project related configuration options
|
| 15 |
+
#---------------------------------------------------------------------------
|
| 16 |
+
|
| 17 |
+
# This tag specifies the encoding used for all characters in the config file
|
| 18 |
+
# that follow. The default is UTF-8 which is also the encoding used for all
|
| 19 |
+
# text before the first occurrence of this tag. Doxygen uses libiconv (or the
|
| 20 |
+
# iconv built into libc) for the transcoding. See
|
| 21 |
+
# http://www.gnu.org/software/libiconv for the list of possible encodings.
|
| 22 |
+
|
| 23 |
+
DOXYFILE_ENCODING = UTF-8
|
| 24 |
+
|
| 25 |
+
# The PROJECT_NAME tag is a single word (or sequence of words) that should
|
| 26 |
+
# identify the project. Note that if you do not use Doxywizard you need
|
| 27 |
+
# to put quotes around the project name if it contains spaces.
|
| 28 |
+
|
| 29 |
+
PROJECT_NAME = "Moses Decoder"
|
| 30 |
+
|
| 31 |
+
# The PROJECT_NUMBER tag can be used to enter a project or revision number.
|
| 32 |
+
# This could be handy for archiving the generated documentation or
|
| 33 |
+
# if some version control system is used.
|
| 34 |
+
|
| 35 |
+
PROJECT_NUMBER =
|
| 36 |
+
|
| 37 |
+
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
| 38 |
+
# for a project that appears at the top of each page and should give viewer
|
| 39 |
+
# a quick idea about the purpose of the project. Keep the description short.
|
| 40 |
+
|
| 41 |
+
PROJECT_BRIEF =
|
| 42 |
+
|
| 43 |
+
# With the PROJECT_LOGO tag one can specify an logo or icon that is
|
| 44 |
+
# included in the documentation. The maximum height of the logo should not
|
| 45 |
+
# exceed 55 pixels and the maximum width should not exceed 200 pixels.
|
| 46 |
+
# Doxygen will copy the logo to the output directory.
|
| 47 |
+
|
| 48 |
+
PROJECT_LOGO =
|
| 49 |
+
|
| 50 |
+
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
| 51 |
+
# base path where the generated documentation will be put.
|
| 52 |
+
# If a relative path is entered, it will be relative to the location
|
| 53 |
+
# where doxygen was started. If left blank the current directory will be used.
|
| 54 |
+
|
| 55 |
+
OUTPUT_DIRECTORY = doxy
|
| 56 |
+
|
| 57 |
+
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
|
| 58 |
+
# 4096 sub-directories (in 2 levels) under the output directory of each output
|
| 59 |
+
# format and will distribute the generated files over these directories.
|
| 60 |
+
# Enabling this option can be useful when feeding doxygen a huge amount of
|
| 61 |
+
# source files, where putting all generated files in the same directory would
|
| 62 |
+
# otherwise cause performance problems for the file system.
|
| 63 |
+
|
| 64 |
+
CREATE_SUBDIRS = NO
|
| 65 |
+
|
| 66 |
+
# The OUTPUT_LANGUAGE tag is used to specify the language in which all
|
| 67 |
+
# documentation generated by doxygen is written. Doxygen will use this
|
| 68 |
+
# information to generate all constant output in the proper language.
|
| 69 |
+
# The default language is English, other supported languages are:
|
| 70 |
+
# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
|
| 71 |
+
# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
|
| 72 |
+
# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
|
| 73 |
+
# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
|
| 74 |
+
# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
|
| 75 |
+
# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
|
| 76 |
+
|
| 77 |
+
OUTPUT_LANGUAGE = English
|
| 78 |
+
|
| 79 |
+
# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
|
| 80 |
+
# include brief member descriptions after the members that are listed in
|
| 81 |
+
# the file and class documentation (similar to JavaDoc).
|
| 82 |
+
# Set to NO to disable this.
|
| 83 |
+
|
| 84 |
+
BRIEF_MEMBER_DESC = YES
|
| 85 |
+
|
| 86 |
+
# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
|
| 87 |
+
# the brief description of a member or function before the detailed description.
|
| 88 |
+
# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
|
| 89 |
+
# brief descriptions will be completely suppressed.
|
| 90 |
+
|
| 91 |
+
REPEAT_BRIEF = YES
|
| 92 |
+
|
| 93 |
+
# This tag implements a quasi-intelligent brief description abbreviator
|
| 94 |
+
# that is used to form the text in various listings. Each string
|
| 95 |
+
# in this list, if found as the leading text of the brief description, will be
|
| 96 |
+
# stripped from the text and the result after processing the whole list, is
|
| 97 |
+
# used as the annotated text. Otherwise, the brief description is used as-is.
|
| 98 |
+
# If left blank, the following values are used ("$name" is automatically
|
| 99 |
+
# replaced with the name of the entity): "The $name class" "The $name widget"
|
| 100 |
+
# "The $name file" "is" "provides" "specifies" "contains"
|
| 101 |
+
# "represents" "a" "an" "the"
|
| 102 |
+
|
| 103 |
+
ABBREVIATE_BRIEF =
|
| 104 |
+
|
| 105 |
+
# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
|
| 106 |
+
# Doxygen will generate a detailed section even if there is only a brief
|
| 107 |
+
# description.
|
| 108 |
+
|
| 109 |
+
ALWAYS_DETAILED_SEC = NO
|
| 110 |
+
|
| 111 |
+
# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
|
| 112 |
+
# inherited members of a class in the documentation of that class as if those
|
| 113 |
+
# members were ordinary class members. Constructors, destructors and assignment
|
| 114 |
+
# operators of the base classes will not be shown.
|
| 115 |
+
|
| 116 |
+
INLINE_INHERITED_MEMB = NO
|
| 117 |
+
|
| 118 |
+
# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
|
| 119 |
+
# path before files name in the file list and in the header files. If set
|
| 120 |
+
# to NO the shortest path that makes the file name unique will be used.
|
| 121 |
+
|
| 122 |
+
FULL_PATH_NAMES = YES
|
| 123 |
+
|
| 124 |
+
# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
|
| 125 |
+
# can be used to strip a user-defined part of the path. Stripping is
|
| 126 |
+
# only done if one of the specified strings matches the left-hand part of
|
| 127 |
+
# the path. The tag can be used to show relative paths in the file list.
|
| 128 |
+
# If left blank the directory from which doxygen is run is used as the
|
| 129 |
+
# path to strip.
|
| 130 |
+
|
| 131 |
+
STRIP_FROM_PATH =
|
| 132 |
+
|
| 133 |
+
# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
|
| 134 |
+
# the path mentioned in the documentation of a class, which tells
|
| 135 |
+
# the reader which header file to include in order to use a class.
|
| 136 |
+
# If left blank only the name of the header file containing the class
|
| 137 |
+
# definition is used. Otherwise one should specify the include paths that
|
| 138 |
+
# are normally passed to the compiler using the -I flag.
|
| 139 |
+
|
| 140 |
+
STRIP_FROM_INC_PATH =
|
| 141 |
+
|
| 142 |
+
# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
|
| 143 |
+
# (but less readable) file names. This can be useful if your file system
|
| 144 |
+
# doesn't support long names like on DOS, Mac, or CD-ROM.
|
| 145 |
+
|
| 146 |
+
SHORT_NAMES = NO
|
| 147 |
+
|
| 148 |
+
# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
|
| 149 |
+
# will interpret the first line (until the first dot) of a JavaDoc-style
|
| 150 |
+
# comment as the brief description. If set to NO, the JavaDoc
|
| 151 |
+
# comments will behave just like regular Qt-style comments
|
| 152 |
+
# (thus requiring an explicit @brief command for a brief description.)
|
| 153 |
+
|
| 154 |
+
JAVADOC_AUTOBRIEF = NO
|
| 155 |
+
|
| 156 |
+
# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
|
| 157 |
+
# interpret the first line (until the first dot) of a Qt-style
|
| 158 |
+
# comment as the brief description. If set to NO, the comments
|
| 159 |
+
# will behave just like regular Qt-style comments (thus requiring
|
| 160 |
+
# an explicit \brief command for a brief description.)
|
| 161 |
+
|
| 162 |
+
QT_AUTOBRIEF = NO
|
| 163 |
+
|
| 164 |
+
# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
|
| 165 |
+
# treat a multi-line C++ special comment block (i.e. a block of //! or ///
|
| 166 |
+
# comments) as a brief description. This used to be the default behaviour.
|
| 167 |
+
# The new default is to treat a multi-line C++ comment block as a detailed
|
| 168 |
+
# description. Set this tag to YES if you prefer the old behaviour instead.
|
| 169 |
+
|
| 170 |
+
MULTILINE_CPP_IS_BRIEF = NO
|
| 171 |
+
|
| 172 |
+
# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
|
| 173 |
+
# member inherits the documentation from any documented member that it
|
| 174 |
+
# re-implements.
|
| 175 |
+
|
| 176 |
+
INHERIT_DOCS = YES
|
| 177 |
+
|
| 178 |
+
# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
|
| 179 |
+
# a new page for each member. If set to NO, the documentation of a member will
|
| 180 |
+
# be part of the file/class/namespace that contains it.
|
| 181 |
+
|
| 182 |
+
SEPARATE_MEMBER_PAGES = NO
|
| 183 |
+
|
| 184 |
+
# The TAB_SIZE tag can be used to set the number of spaces in a tab.
|
| 185 |
+
# Doxygen uses this value to replace tabs by spaces in code fragments.
|
| 186 |
+
|
| 187 |
+
TAB_SIZE = 8
|
| 188 |
+
|
| 189 |
+
# This tag can be used to specify a number of aliases that acts
|
| 190 |
+
# as commands in the documentation. An alias has the form "name=value".
|
| 191 |
+
# For example adding "sideeffect=\par Side Effects:\n" will allow you to
|
| 192 |
+
# put the command \sideeffect (or @sideeffect) in the documentation, which
|
| 193 |
+
# will result in a user-defined paragraph with heading "Side Effects:".
|
| 194 |
+
# You can put \n's in the value part of an alias to insert newlines.
|
| 195 |
+
|
| 196 |
+
ALIASES =
|
| 197 |
+
|
| 198 |
+
# This tag can be used to specify a number of word-keyword mappings (TCL only).
|
| 199 |
+
# A mapping has the form "name=value". For example adding
|
| 200 |
+
# "class=itcl::class" will allow you to use the command class in the
|
| 201 |
+
# itcl::class meaning.
|
| 202 |
+
|
| 203 |
+
TCL_SUBST =
|
| 204 |
+
|
| 205 |
+
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
|
| 206 |
+
# sources only. Doxygen will then generate output that is more tailored for C.
|
| 207 |
+
# For instance, some of the names that are used will be different. The list
|
| 208 |
+
# of all members will be omitted, etc.
|
| 209 |
+
|
| 210 |
+
OPTIMIZE_OUTPUT_FOR_C = NO
|
| 211 |
+
|
| 212 |
+
# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
|
| 213 |
+
# sources only. Doxygen will then generate output that is more tailored for
|
| 214 |
+
# Java. For instance, namespaces will be presented as packages, qualified
|
| 215 |
+
# scopes will look different, etc.
|
| 216 |
+
|
| 217 |
+
OPTIMIZE_OUTPUT_JAVA = NO
|
| 218 |
+
|
| 219 |
+
# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
|
| 220 |
+
# sources only. Doxygen will then generate output that is more tailored for
|
| 221 |
+
# Fortran.
|
| 222 |
+
|
| 223 |
+
OPTIMIZE_FOR_FORTRAN = NO
|
| 224 |
+
|
| 225 |
+
# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
|
| 226 |
+
# sources. Doxygen will then generate output that is tailored for
|
| 227 |
+
# VHDL.
|
| 228 |
+
|
| 229 |
+
OPTIMIZE_OUTPUT_VHDL = NO
|
| 230 |
+
|
| 231 |
+
# Doxygen selects the parser to use depending on the extension of the files it
|
| 232 |
+
# parses. With this tag you can assign which parser to use for a given extension.
|
| 233 |
+
# Doxygen has a built-in mapping, but you can override or extend it using this
|
| 234 |
+
# tag. The format is ext=language, where ext is a file extension, and language
|
| 235 |
+
# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
|
| 236 |
+
# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
|
| 237 |
+
# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
|
| 238 |
+
# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
|
| 239 |
+
# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
|
| 240 |
+
|
| 241 |
+
EXTENSION_MAPPING =
|
| 242 |
+
|
| 243 |
+
# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
|
| 244 |
+
# to include (a tag file for) the STL sources as input, then you should
|
| 245 |
+
# set this tag to YES in order to let doxygen match functions declarations and
|
| 246 |
+
# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
|
| 247 |
+
# func(std::string) {}). This also makes the inheritance and collaboration
|
| 248 |
+
# diagrams that involve STL classes more complete and accurate.
|
| 249 |
+
|
| 250 |
+
BUILTIN_STL_SUPPORT = NO
|
| 251 |
+
|
| 252 |
+
# If you use Microsoft's C++/CLI language, you should set this option to YES to
|
| 253 |
+
# enable parsing support.
|
| 254 |
+
|
| 255 |
+
CPP_CLI_SUPPORT = NO
|
| 256 |
+
|
| 257 |
+
# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
|
| 258 |
+
# Doxygen will parse them like normal C++ but will assume all classes use public
|
| 259 |
+
# instead of private inheritance when no explicit protection keyword is present.
|
| 260 |
+
|
| 261 |
+
SIP_SUPPORT = NO
|
| 262 |
+
|
| 263 |
+
# For Microsoft's IDL there are propget and propput attributes to indicate getter
|
| 264 |
+
# and setter methods for a property. Setting this option to YES (the default)
|
| 265 |
+
# will make doxygen replace the get and set methods by a property in the
|
| 266 |
+
# documentation. This will only work if the methods are indeed getting or
|
| 267 |
+
# setting a simple type. If this is not the case, or you want to show the
|
| 268 |
+
# methods anyway, you should set this option to NO.
|
| 269 |
+
|
| 270 |
+
IDL_PROPERTY_SUPPORT = YES
|
| 271 |
+
|
| 272 |
+
# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
|
| 273 |
+
# tag is set to YES, then doxygen will reuse the documentation of the first
|
| 274 |
+
# member in the group (if any) for the other members of the group. By default
|
| 275 |
+
# all members of a group must be documented explicitly.
|
| 276 |
+
|
| 277 |
+
DISTRIBUTE_GROUP_DOC = NO
|
| 278 |
+
|
| 279 |
+
# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
|
| 280 |
+
# the same type (for instance a group of public functions) to be put as a
|
| 281 |
+
# subgroup of that type (e.g. under the Public Functions section). Set it to
|
| 282 |
+
# NO to prevent subgrouping. Alternatively, this can be done per class using
|
| 283 |
+
# the \nosubgrouping command.
|
| 284 |
+
|
| 285 |
+
SUBGROUPING = YES
|
| 286 |
+
|
| 287 |
+
# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
|
| 288 |
+
# unions are shown inside the group in which they are included (e.g. using
|
| 289 |
+
# @ingroup) instead of on a separate page (for HTML and Man pages) or
|
| 290 |
+
# section (for LaTeX and RTF).
|
| 291 |
+
|
| 292 |
+
INLINE_GROUPED_CLASSES = NO
|
| 293 |
+
|
| 294 |
+
# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
|
| 295 |
+
# unions with only public data fields will be shown inline in the documentation
|
| 296 |
+
# of the scope in which they are defined (i.e. file, namespace, or group
|
| 297 |
+
# documentation), provided this scope is documented. If set to NO (the default),
|
| 298 |
+
# structs, classes, and unions are shown on a separate page (for HTML and Man
|
| 299 |
+
# pages) or section (for LaTeX and RTF).
|
| 300 |
+
|
| 301 |
+
INLINE_SIMPLE_STRUCTS = NO
|
| 302 |
+
|
| 303 |
+
# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
|
| 304 |
+
# is documented as struct, union, or enum with the name of the typedef. So
|
| 305 |
+
# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
|
| 306 |
+
# with name TypeT. When disabled the typedef will appear as a member of a file,
|
| 307 |
+
# namespace, or class. And the struct will be named TypeS. This can typically
|
| 308 |
+
# be useful for C code in case the coding convention dictates that all compound
|
| 309 |
+
# types are typedef'ed and only the typedef is referenced, never the tag name.
|
| 310 |
+
|
| 311 |
+
TYPEDEF_HIDES_STRUCT = NO
|
| 312 |
+
|
| 313 |
+
# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
|
| 314 |
+
# determine which symbols to keep in memory and which to flush to disk.
|
| 315 |
+
# When the cache is full, less often used symbols will be written to disk.
|
| 316 |
+
# For small to medium size projects (<1000 input files) the default value is
|
| 317 |
+
# probably good enough. For larger projects a too small cache size can cause
|
| 318 |
+
# doxygen to be busy swapping symbols to and from disk most of the time
|
| 319 |
+
# causing a significant performance penalty.
|
| 320 |
+
# If the system has enough physical memory increasing the cache will improve the
|
| 321 |
+
# performance by keeping more symbols in memory. Note that the value works on
|
| 322 |
+
# a logarithmic scale so increasing the size by one will roughly double the
|
| 323 |
+
# memory usage. The cache size is given by this formula:
|
| 324 |
+
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
|
| 325 |
+
# corresponding to a cache size of 2^16 = 65536 symbols.
|
| 326 |
+
|
| 327 |
+
SYMBOL_CACHE_SIZE = 0
|
| 328 |
+
|
| 329 |
+
# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
|
| 330 |
+
# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
|
| 331 |
+
# their name and scope. Since this can be an expensive process and often the
|
| 332 |
+
# same symbol appear multiple times in the code, doxygen keeps a cache of
|
| 333 |
+
# pre-resolved symbols. If the cache is too small doxygen will become slower.
|
| 334 |
+
# If the cache is too large, memory is wasted. The cache size is given by this
|
| 335 |
+
# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
|
| 336 |
+
# corresponding to a cache size of 2^16 = 65536 symbols.
|
| 337 |
+
|
| 338 |
+
LOOKUP_CACHE_SIZE = 0
|
| 339 |
+
|
| 340 |
+
#---------------------------------------------------------------------------
|
| 341 |
+
# Build related configuration options
|
| 342 |
+
#---------------------------------------------------------------------------
|
| 343 |
+
|
| 344 |
+
# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
|
| 345 |
+
# documentation are documented, even if no documentation was available.
|
| 346 |
+
# Private class members and static file members will be hidden unless
|
| 347 |
+
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
|
| 348 |
+
|
| 349 |
+
EXTRACT_ALL = YES
|
| 350 |
+
|
| 351 |
+
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
|
| 352 |
+
# will be included in the documentation.
|
| 353 |
+
|
| 354 |
+
EXTRACT_PRIVATE = YES
|
| 355 |
+
|
| 356 |
+
# If the EXTRACT_STATIC tag is set to YES all static members of a file
|
| 357 |
+
# will be included in the documentation.
|
| 358 |
+
|
| 359 |
+
EXTRACT_STATIC = YES
|
| 360 |
+
|
| 361 |
+
# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
|
| 362 |
+
# defined locally in source files will be included in the documentation.
|
| 363 |
+
# If set to NO only classes defined in header files are included.
|
| 364 |
+
|
| 365 |
+
EXTRACT_LOCAL_CLASSES = NO
|
| 366 |
+
|
| 367 |
+
# This flag is only useful for Objective-C code. When set to YES local
|
| 368 |
+
# methods, which are defined in the implementation section but not in
|
| 369 |
+
# the interface are included in the documentation.
|
| 370 |
+
# If set to NO (the default) only methods in the interface are included.
|
| 371 |
+
|
| 372 |
+
EXTRACT_LOCAL_METHODS = NO
|
| 373 |
+
|
| 374 |
+
# If this flag is set to YES, the members of anonymous namespaces will be
|
| 375 |
+
# extracted and appear in the documentation as a namespace called
|
| 376 |
+
# 'anonymous_namespace{file}', where file will be replaced with the base
|
| 377 |
+
# name of the file that contains the anonymous namespace. By default
|
| 378 |
+
# anonymous namespaces are hidden.
|
| 379 |
+
|
| 380 |
+
EXTRACT_ANON_NSPACES = NO
|
| 381 |
+
|
| 382 |
+
# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
|
| 383 |
+
# undocumented members of documented classes, files or namespaces.
|
| 384 |
+
# If set to NO (the default) these members will be included in the
|
| 385 |
+
# various overviews, but no documentation section is generated.
|
| 386 |
+
# This option has no effect if EXTRACT_ALL is enabled.
|
| 387 |
+
|
| 388 |
+
HIDE_UNDOC_MEMBERS = NO
|
| 389 |
+
|
| 390 |
+
# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
|
| 391 |
+
# undocumented classes that are normally visible in the class hierarchy.
|
| 392 |
+
# If set to NO (the default) these classes will be included in the various
|
| 393 |
+
# overviews. This option has no effect if EXTRACT_ALL is enabled.
|
| 394 |
+
|
| 395 |
+
HIDE_UNDOC_CLASSES = NO
|
| 396 |
+
|
| 397 |
+
# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
|
| 398 |
+
# friend (class|struct|union) declarations.
|
| 399 |
+
# If set to NO (the default) these declarations will be included in the
|
| 400 |
+
# documentation.
|
| 401 |
+
|
| 402 |
+
HIDE_FRIEND_COMPOUNDS = NO
|
| 403 |
+
|
| 404 |
+
# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
|
| 405 |
+
# documentation blocks found inside the body of a function.
|
| 406 |
+
# If set to NO (the default) these blocks will be appended to the
|
| 407 |
+
# function's detailed documentation block.
|
| 408 |
+
|
| 409 |
+
HIDE_IN_BODY_DOCS = NO
|
| 410 |
+
|
| 411 |
+
# The INTERNAL_DOCS tag determines if documentation
|
| 412 |
+
# that is typed after a \internal command is included. If the tag is set
|
| 413 |
+
# to NO (the default) then the documentation will be excluded.
|
| 414 |
+
# Set it to YES to include the internal documentation.
|
| 415 |
+
|
| 416 |
+
INTERNAL_DOCS = NO
|
| 417 |
+
|
| 418 |
+
# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
|
| 419 |
+
# file names in lower-case letters. If set to YES upper-case letters are also
|
| 420 |
+
# allowed. This is useful if you have classes or files whose names only differ
|
| 421 |
+
# in case and if your file system supports case sensitive file names. Windows
|
| 422 |
+
# and Mac users are advised to set this option to NO.
|
| 423 |
+
|
| 424 |
+
CASE_SENSE_NAMES = YES
|
| 425 |
+
|
| 426 |
+
# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
|
| 427 |
+
# will show members with their full class and namespace scopes in the
|
| 428 |
+
# documentation. If set to YES the scope will be hidden.
|
| 429 |
+
|
| 430 |
+
HIDE_SCOPE_NAMES = NO
|
| 431 |
+
|
| 432 |
+
# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
|
| 433 |
+
# will put a list of the files that are included by a file in the documentation
|
| 434 |
+
# of that file.
|
| 435 |
+
|
| 436 |
+
SHOW_INCLUDE_FILES = YES
|
| 437 |
+
|
| 438 |
+
# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
|
| 439 |
+
# will list include files with double quotes in the documentation
|
| 440 |
+
# rather than with sharp brackets.
|
| 441 |
+
|
| 442 |
+
FORCE_LOCAL_INCLUDES = NO
|
| 443 |
+
|
| 444 |
+
# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
|
| 445 |
+
# is inserted in the documentation for inline members.
|
| 446 |
+
|
| 447 |
+
INLINE_INFO = YES
|
| 448 |
+
|
| 449 |
+
# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
|
| 450 |
+
# will sort the (detailed) documentation of file and class members
|
| 451 |
+
# alphabetically by member name. If set to NO the members will appear in
|
| 452 |
+
# declaration order.
|
| 453 |
+
|
| 454 |
+
SORT_MEMBER_DOCS = YES
|
| 455 |
+
|
| 456 |
+
# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
|
| 457 |
+
# brief documentation of file, namespace and class members alphabetically
|
| 458 |
+
# by member name. If set to NO (the default) the members will appear in
|
| 459 |
+
# declaration order.
|
| 460 |
+
|
| 461 |
+
SORT_BRIEF_DOCS = NO
|
| 462 |
+
|
| 463 |
+
# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
|
| 464 |
+
# will sort the (brief and detailed) documentation of class members so that
|
| 465 |
+
# constructors and destructors are listed first. If set to NO (the default)
|
| 466 |
+
# the constructors will appear in the respective orders defined by
|
| 467 |
+
# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
|
| 468 |
+
# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
|
| 469 |
+
# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
|
| 470 |
+
|
| 471 |
+
SORT_MEMBERS_CTORS_1ST = NO
|
| 472 |
+
|
| 473 |
+
# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
|
| 474 |
+
# hierarchy of group names into alphabetical order. If set to NO (the default)
|
| 475 |
+
# the group names will appear in their defined order.
|
| 476 |
+
|
| 477 |
+
SORT_GROUP_NAMES = NO
|
| 478 |
+
|
| 479 |
+
# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
|
| 480 |
+
# sorted by fully-qualified names, including namespaces. If set to
|
| 481 |
+
# NO (the default), the class list will be sorted only by class name,
|
| 482 |
+
# not including the namespace part.
|
| 483 |
+
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
|
| 484 |
+
# Note: This option applies only to the class list, not to the
|
| 485 |
+
# alphabetical list.
|
| 486 |
+
|
| 487 |
+
SORT_BY_SCOPE_NAME = NO
|
| 488 |
+
|
| 489 |
+
# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
|
| 490 |
+
# do proper type resolution of all parameters of a function it will reject a
|
| 491 |
+
# match between the prototype and the implementation of a member function even
|
| 492 |
+
# if there is only one candidate or it is obvious which candidate to choose
|
| 493 |
+
# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
|
| 494 |
+
# will still accept a match between prototype and implementation in such cases.
|
| 495 |
+
|
| 496 |
+
STRICT_PROTO_MATCHING = NO
|
| 497 |
+
|
| 498 |
+
# The GENERATE_TODOLIST tag can be used to enable (YES) or
|
| 499 |
+
# disable (NO) the todo list. This list is created by putting \todo
|
| 500 |
+
# commands in the documentation.
|
| 501 |
+
|
| 502 |
+
GENERATE_TODOLIST = YES
|
| 503 |
+
|
| 504 |
+
# The GENERATE_TESTLIST tag can be used to enable (YES) or
|
| 505 |
+
# disable (NO) the test list. This list is created by putting \test
|
| 506 |
+
# commands in the documentation.
|
| 507 |
+
|
| 508 |
+
GENERATE_TESTLIST = YES
|
| 509 |
+
|
| 510 |
+
# The GENERATE_BUGLIST tag can be used to enable (YES) or
|
| 511 |
+
# disable (NO) the bug list. This list is created by putting \bug
|
| 512 |
+
# commands in the documentation.
|
| 513 |
+
|
| 514 |
+
GENERATE_BUGLIST = YES
|
| 515 |
+
|
| 516 |
+
# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
|
| 517 |
+
# disable (NO) the deprecated list. This list is created by putting
|
| 518 |
+
# \deprecated commands in the documentation.
|
| 519 |
+
|
| 520 |
+
GENERATE_DEPRECATEDLIST= YES
|
| 521 |
+
|
| 522 |
+
# The ENABLED_SECTIONS tag can be used to enable conditional
|
| 523 |
+
# documentation sections, marked by \if sectionname ... \endif.
|
| 524 |
+
|
| 525 |
+
ENABLED_SECTIONS =
|
| 526 |
+
|
| 527 |
+
# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
|
| 528 |
+
# the initial value of a variable or macro consists of for it to appear in
|
| 529 |
+
# the documentation. If the initializer consists of more lines than specified
|
| 530 |
+
# here it will be hidden. Use a value of 0 to hide initializers completely.
|
| 531 |
+
# The appearance of the initializer of individual variables and macros in the
|
| 532 |
+
# documentation can be controlled using \showinitializer or \hideinitializer
|
| 533 |
+
# command in the documentation regardless of this setting.
|
| 534 |
+
|
| 535 |
+
MAX_INITIALIZER_LINES = 30
|
| 536 |
+
|
| 537 |
+
# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
|
| 538 |
+
# at the bottom of the documentation of classes and structs. If set to YES the
|
| 539 |
+
# list will mention the files that were used to generate the documentation.
|
| 540 |
+
|
| 541 |
+
SHOW_USED_FILES = YES
|
| 542 |
+
|
| 543 |
+
# If the sources in your project are distributed over multiple directories
|
| 544 |
+
# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
|
| 545 |
+
# in the documentation. The default is NO.
|
| 546 |
+
|
| 547 |
+
SHOW_DIRECTORIES = NO
|
| 548 |
+
|
| 549 |
+
# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
|
| 550 |
+
# This will remove the Files entry from the Quick Index and from the
|
| 551 |
+
# Folder Tree View (if specified). The default is YES.
|
| 552 |
+
|
| 553 |
+
SHOW_FILES = YES
|
| 554 |
+
|
| 555 |
+
# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
|
| 556 |
+
# Namespaces page.
|
| 557 |
+
# This will remove the Namespaces entry from the Quick Index
|
| 558 |
+
# and from the Folder Tree View (if specified). The default is YES.
|
| 559 |
+
|
| 560 |
+
SHOW_NAMESPACES = YES
|
| 561 |
+
|
| 562 |
+
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
|
| 563 |
+
# doxygen should invoke to get the current version for each file (typically from
|
| 564 |
+
# the version control system). Doxygen will invoke the program by executing (via
|
| 565 |
+
# popen()) the command <command> <input-file>, where <command> is the value of
|
| 566 |
+
# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
|
| 567 |
+
# provided by doxygen. Whatever the program writes to standard output
|
| 568 |
+
# is used as the file version. See the manual for examples.
|
| 569 |
+
|
| 570 |
+
FILE_VERSION_FILTER =
|
| 571 |
+
|
| 572 |
+
# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
|
| 573 |
+
# by doxygen. The layout file controls the global structure of the generated
|
| 574 |
+
# output files in an output format independent way. The create the layout file
|
| 575 |
+
# that represents doxygen's defaults, run doxygen with the -l option.
|
| 576 |
+
# You can optionally specify a file name after the option, if omitted
|
| 577 |
+
# DoxygenLayout.xml will be used as the name of the layout file.
|
| 578 |
+
|
| 579 |
+
LAYOUT_FILE =
|
| 580 |
+
|
| 581 |
+
# The CITE_BIB_FILES tag can be used to specify one or more bib files
|
| 582 |
+
# containing the references data. This must be a list of .bib files. The
|
| 583 |
+
# .bib extension is automatically appended if omitted. Using this command
|
| 584 |
+
# requires the bibtex tool to be installed. See also
|
| 585 |
+
# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
|
| 586 |
+
# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
|
| 587 |
+
# feature you need bibtex and perl available in the search path.
|
| 588 |
+
|
| 589 |
+
CITE_BIB_FILES =
|
| 590 |
+
|
| 591 |
+
#---------------------------------------------------------------------------
|
| 592 |
+
# configuration options related to warning and progress messages
|
| 593 |
+
#---------------------------------------------------------------------------
|
| 594 |
+
|
| 595 |
+
# The QUIET tag can be used to turn on/off the messages that are generated
|
| 596 |
+
# by doxygen. Possible values are YES and NO. If left blank NO is used.
|
| 597 |
+
|
| 598 |
+
QUIET = NO
|
| 599 |
+
|
| 600 |
+
# The WARNINGS tag can be used to turn on/off the warning messages that are
|
| 601 |
+
# generated by doxygen. Possible values are YES and NO. If left blank
|
| 602 |
+
# NO is used.
|
| 603 |
+
|
| 604 |
+
WARNINGS = YES
|
| 605 |
+
|
| 606 |
+
# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
|
| 607 |
+
# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
|
| 608 |
+
# automatically be disabled.
|
| 609 |
+
|
| 610 |
+
WARN_IF_UNDOCUMENTED = YES
|
| 611 |
+
|
| 612 |
+
# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
|
| 613 |
+
# potential errors in the documentation, such as not documenting some
|
| 614 |
+
# parameters in a documented function, or documenting parameters that
|
| 615 |
+
# don't exist or using markup commands wrongly.
|
| 616 |
+
|
| 617 |
+
WARN_IF_DOC_ERROR = YES
|
| 618 |
+
|
| 619 |
+
# The WARN_NO_PARAMDOC option can be enabled to get warnings for
|
| 620 |
+
# functions that are documented, but have no documentation for their parameters
|
| 621 |
+
# or return value. If set to NO (the default) doxygen will only warn about
|
| 622 |
+
# wrong or incomplete parameter documentation, but not about the absence of
|
| 623 |
+
# documentation.
|
| 624 |
+
|
| 625 |
+
WARN_NO_PARAMDOC = NO
|
| 626 |
+
|
| 627 |
+
# The WARN_FORMAT tag determines the format of the warning messages that
|
| 628 |
+
# doxygen can produce. The string should contain the $file, $line, and $text
|
| 629 |
+
# tags, which will be replaced by the file and line number from which the
|
| 630 |
+
# warning originated and the warning text. Optionally the format may contain
|
| 631 |
+
# $version, which will be replaced by the version of the file (if it could
|
| 632 |
+
# be obtained via FILE_VERSION_FILTER)
|
| 633 |
+
|
| 634 |
+
WARN_FORMAT = "$file:$line: $text"
|
| 635 |
+
|
| 636 |
+
# The WARN_LOGFILE tag can be used to specify a file to which warning
|
| 637 |
+
# and error messages should be written. If left blank the output is written
|
| 638 |
+
# to stderr.
|
| 639 |
+
|
| 640 |
+
WARN_LOGFILE =
|
| 641 |
+
|
| 642 |
+
#---------------------------------------------------------------------------
|
| 643 |
+
# configuration options related to the input files
|
| 644 |
+
#---------------------------------------------------------------------------
|
| 645 |
+
|
| 646 |
+
# The INPUT tag can be used to specify the files and/or directories that contain
|
| 647 |
+
# documented source files. You may enter file names like "myfile.cpp" or
|
| 648 |
+
# directories like "/usr/src/myproject". Separate the files or directories
|
| 649 |
+
# with spaces.
|
| 650 |
+
|
| 651 |
+
INPUT = moses
|
| 652 |
+
|
| 653 |
+
# This tag can be used to specify the character encoding of the source files
|
| 654 |
+
# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
|
| 655 |
+
# also the default input encoding. Doxygen uses libiconv (or the iconv built
|
| 656 |
+
# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
|
| 657 |
+
# the list of possible encodings.
|
| 658 |
+
|
| 659 |
+
INPUT_ENCODING = UTF-8
|
| 660 |
+
|
| 661 |
+
# If the value of the INPUT tag contains directories, you can use the
|
| 662 |
+
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
|
| 663 |
+
# and *.h) to filter out the source-files in the directories. If left
|
| 664 |
+
# blank the following patterns are tested:
|
| 665 |
+
# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
|
| 666 |
+
# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
|
| 667 |
+
# *.f90 *.f *.for *.vhd *.vhdl
|
| 668 |
+
|
| 669 |
+
FILE_PATTERNS =
|
| 670 |
+
|
| 671 |
+
# The RECURSIVE tag can be used to turn specify whether or not subdirectories
|
| 672 |
+
# should be searched for input files as well. Possible values are YES and NO.
|
| 673 |
+
# If left blank NO is used.
|
| 674 |
+
|
| 675 |
+
RECURSIVE = YES
|
| 676 |
+
|
| 677 |
+
# The EXCLUDE tag can be used to specify files and/or directories that should be
|
| 678 |
+
# excluded from the INPUT source files. This way you can easily exclude a
|
| 679 |
+
# subdirectory from a directory tree whose root is specified with the INPUT tag.
|
| 680 |
+
# Note that relative paths are relative to the directory from which doxygen is
|
| 681 |
+
# run.
|
| 682 |
+
|
| 683 |
+
EXCLUDE = opt regtest doxy
|
| 684 |
+
|
| 685 |
+
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
|
| 686 |
+
# directories that are symbolic links (a Unix file system feature) are excluded
|
| 687 |
+
# from the input.
|
| 688 |
+
|
| 689 |
+
EXCLUDE_SYMLINKS = NO
|
| 690 |
+
|
| 691 |
+
# If the value of the INPUT tag contains directories, you can use the
|
| 692 |
+
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
|
| 693 |
+
# certain files from those directories. Note that the wildcards are matched
|
| 694 |
+
# against the file with absolute path, so to exclude all test directories
|
| 695 |
+
# for example use the pattern */test/*
|
| 696 |
+
|
| 697 |
+
EXCLUDE_PATTERNS = opt/* regtest/* doxy/*
|
| 698 |
+
|
| 699 |
+
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
|
| 700 |
+
# (namespaces, classes, functions, etc.) that should be excluded from the
|
| 701 |
+
# output. The symbol name can be a fully qualified name, a word, or if the
|
| 702 |
+
# wildcard * is used, a substring. Examples: ANamespace, AClass,
|
| 703 |
+
# AClass::ANamespace, ANamespace::*Test
|
| 704 |
+
|
| 705 |
+
EXCLUDE_SYMBOLS =
|
| 706 |
+
|
| 707 |
+
# The EXAMPLE_PATH tag can be used to specify one or more files or
|
| 708 |
+
# directories that contain example code fragments that are included (see
|
| 709 |
+
# the \include command).
|
| 710 |
+
|
| 711 |
+
EXAMPLE_PATH =
|
| 712 |
+
|
| 713 |
+
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
|
| 714 |
+
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
|
| 715 |
+
# and *.h) to filter out the source-files in the directories. If left
|
| 716 |
+
# blank all files are included.
|
| 717 |
+
|
| 718 |
+
EXAMPLE_PATTERNS =
|
| 719 |
+
|
| 720 |
+
# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
|
| 721 |
+
# searched for input files to be used with the \include or \dontinclude
|
| 722 |
+
# commands irrespective of the value of the RECURSIVE tag.
|
| 723 |
+
# Possible values are YES and NO. If left blank NO is used.
|
| 724 |
+
|
| 725 |
+
EXAMPLE_RECURSIVE = NO
|
| 726 |
+
|
| 727 |
+
# The IMAGE_PATH tag can be used to specify one or more files or
|
| 728 |
+
# directories that contain image that are included in the documentation (see
|
| 729 |
+
# the \image command).
|
| 730 |
+
|
| 731 |
+
IMAGE_PATH =
|
| 732 |
+
|
| 733 |
+
# The INPUT_FILTER tag can be used to specify a program that doxygen should
|
| 734 |
+
# invoke to filter for each input file. Doxygen will invoke the filter program
|
| 735 |
+
# by executing (via popen()) the command <filter> <input-file>, where <filter>
|
| 736 |
+
# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
|
| 737 |
+
# input file. Doxygen will then use the output that the filter program writes
|
| 738 |
+
# to standard output.
|
| 739 |
+
# If FILTER_PATTERNS is specified, this tag will be
|
| 740 |
+
# ignored.
|
| 741 |
+
|
| 742 |
+
INPUT_FILTER =
|
| 743 |
+
|
| 744 |
+
# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
|
| 745 |
+
# basis.
|
| 746 |
+
# Doxygen will compare the file name with each pattern and apply the
|
| 747 |
+
# filter if there is a match.
|
| 748 |
+
# The filters are a list of the form:
|
| 749 |
+
# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
|
| 750 |
+
# info on how filters are used. If FILTER_PATTERNS is empty or if
|
| 751 |
+
# non of the patterns match the file name, INPUT_FILTER is applied.
|
| 752 |
+
|
| 753 |
+
FILTER_PATTERNS =
|
| 754 |
+
|
| 755 |
+
# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
|
| 756 |
+
# INPUT_FILTER) will be used to filter the input files when producing source
|
| 757 |
+
# files to browse (i.e. when SOURCE_BROWSER is set to YES).
|
| 758 |
+
|
| 759 |
+
FILTER_SOURCE_FILES = NO,
|
| 760 |
+
|
| 761 |
+
# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
|
| 762 |
+
# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
|
| 763 |
+
# and it is also possible to disable source filtering for a specific pattern
|
| 764 |
+
# using *.ext= (so without naming a filter). This option only has effect when
|
| 765 |
+
# FILTER_SOURCE_FILES is enabled.
|
| 766 |
+
|
| 767 |
+
FILTER_SOURCE_PATTERNS =
|
| 768 |
+
|
| 769 |
+
#---------------------------------------------------------------------------
|
| 770 |
+
# configuration options related to source browsing
|
| 771 |
+
#---------------------------------------------------------------------------
|
| 772 |
+
|
| 773 |
+
# If the SOURCE_BROWSER tag is set to YES then a list of source files will
|
| 774 |
+
# be generated. Documented entities will be cross-referenced with these sources.
|
| 775 |
+
# Note: To get rid of all source code in the generated output, make sure also
|
| 776 |
+
# VERBATIM_HEADERS is set to NO.
|
| 777 |
+
|
| 778 |
+
SOURCE_BROWSER = NO
|
| 779 |
+
|
| 780 |
+
# Setting the INLINE_SOURCES tag to YES will include the body
|
| 781 |
+
# of functions and classes directly in the documentation.
|
| 782 |
+
|
| 783 |
+
INLINE_SOURCES = NO
|
| 784 |
+
|
| 785 |
+
# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
|
| 786 |
+
# doxygen to hide any special comment blocks from generated source code
|
| 787 |
+
# fragments. Normal C and C++ comments will always remain visible.
|
| 788 |
+
|
| 789 |
+
STRIP_CODE_COMMENTS = YES
|
| 790 |
+
|
| 791 |
+
# If the REFERENCED_BY_RELATION tag is set to YES
|
| 792 |
+
# then for each documented function all documented
|
| 793 |
+
# functions referencing it will be listed.
|
| 794 |
+
|
| 795 |
+
REFERENCED_BY_RELATION = NO
|
| 796 |
+
|
| 797 |
+
# If the REFERENCES_RELATION tag is set to YES
|
| 798 |
+
# then for each documented function all documented entities
|
| 799 |
+
# called/used by that function will be listed.
|
| 800 |
+
|
| 801 |
+
REFERENCES_RELATION = NO
|
| 802 |
+
|
| 803 |
+
# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
|
| 804 |
+
# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
|
| 805 |
+
# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
|
| 806 |
+
# link to the source code.
|
| 807 |
+
# Otherwise they will link to the documentation.
|
| 808 |
+
|
| 809 |
+
REFERENCES_LINK_SOURCE = YES
|
| 810 |
+
|
| 811 |
+
# If the USE_HTAGS tag is set to YES then the references to source code
|
| 812 |
+
# will point to the HTML generated by the htags(1) tool instead of doxygen
|
| 813 |
+
# built-in source browser. The htags tool is part of GNU's global source
|
| 814 |
+
# tagging system (see http://www.gnu.org/software/global/global.html). You
|
| 815 |
+
# will need version 4.8.6 or higher.
|
| 816 |
+
|
| 817 |
+
USE_HTAGS = NO
|
| 818 |
+
|
| 819 |
+
# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
|
| 820 |
+
# will generate a verbatim copy of the header file for each class for
|
| 821 |
+
# which an include is specified. Set to NO to disable this.
|
| 822 |
+
|
| 823 |
+
VERBATIM_HEADERS = YES
|
| 824 |
+
|
| 825 |
+
#---------------------------------------------------------------------------
|
| 826 |
+
# configuration options related to the alphabetical class index
|
| 827 |
+
#---------------------------------------------------------------------------
|
| 828 |
+
|
| 829 |
+
# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
|
| 830 |
+
# of all compounds will be generated. Enable this if the project
|
| 831 |
+
# contains a lot of classes, structs, unions or interfaces.
|
| 832 |
+
|
| 833 |
+
ALPHABETICAL_INDEX = YES
|
| 834 |
+
|
| 835 |
+
# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
|
| 836 |
+
# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
|
| 837 |
+
# in which this list will be split (can be a number in the range [1..20])
|
| 838 |
+
|
| 839 |
+
COLS_IN_ALPHA_INDEX = 5
|
| 840 |
+
|
| 841 |
+
# In case all classes in a project start with a common prefix, all
|
| 842 |
+
# classes will be put under the same header in the alphabetical index.
|
| 843 |
+
# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
|
| 844 |
+
# should be ignored while generating the index headers.
|
| 845 |
+
|
| 846 |
+
IGNORE_PREFIX =
|
| 847 |
+
|
| 848 |
+
#---------------------------------------------------------------------------
|
| 849 |
+
# configuration options related to the HTML output
|
| 850 |
+
#---------------------------------------------------------------------------
|
| 851 |
+
|
| 852 |
+
# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
|
| 853 |
+
# generate HTML output.
|
| 854 |
+
|
| 855 |
+
GENERATE_HTML = YES
|
| 856 |
+
|
| 857 |
+
# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
|
| 858 |
+
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
|
| 859 |
+
# put in front of it. If left blank `html' will be used as the default path.
|
| 860 |
+
|
| 861 |
+
HTML_OUTPUT = html
|
| 862 |
+
|
| 863 |
+
# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
|
| 864 |
+
# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
|
| 865 |
+
# doxygen will generate files with .html extension.
|
| 866 |
+
|
| 867 |
+
HTML_FILE_EXTENSION = .html
|
| 868 |
+
|
| 869 |
+
# The HTML_HEADER tag can be used to specify a personal HTML header for
|
| 870 |
+
# each generated HTML page. If it is left blank doxygen will generate a
|
| 871 |
+
# standard header. Note that when using a custom header you are responsible
|
| 872 |
+
# for the proper inclusion of any scripts and style sheets that doxygen
|
| 873 |
+
# needs, which is dependent on the configuration options used.
|
| 874 |
+
# It is advised to generate a default header using "doxygen -w html
|
| 875 |
+
# header.html footer.html stylesheet.css YourConfigFile" and then modify
|
| 876 |
+
# that header. Note that the header is subject to change so you typically
|
| 877 |
+
# have to redo this when upgrading to a newer version of doxygen or when
|
| 878 |
+
# changing the value of configuration settings such as GENERATE_TREEVIEW!
|
| 879 |
+
|
| 880 |
+
HTML_HEADER =
|
| 881 |
+
|
| 882 |
+
# The HTML_FOOTER tag can be used to specify a personal HTML footer for
|
| 883 |
+
# each generated HTML page. If it is left blank doxygen will generate a
|
| 884 |
+
# standard footer.
|
| 885 |
+
|
| 886 |
+
HTML_FOOTER =
|
| 887 |
+
|
| 888 |
+
# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
|
| 889 |
+
# style sheet that is used by each HTML page. It can be used to
|
| 890 |
+
# fine-tune the look of the HTML output. If the tag is left blank doxygen
|
| 891 |
+
# will generate a default style sheet. Note that doxygen will try to copy
|
| 892 |
+
# the style sheet file to the HTML output directory, so don't put your own
|
| 893 |
+
# style sheet in the HTML output directory as well, or it will be erased!
|
| 894 |
+
|
| 895 |
+
HTML_STYLESHEET =
|
| 896 |
+
|
| 897 |
+
# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
|
| 898 |
+
# other source files which should be copied to the HTML output directory. Note
|
| 899 |
+
# that these files will be copied to the base HTML output directory. Use the
|
| 900 |
+
# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
|
| 901 |
+
# files. In the HTML_STYLESHEET file, use the file name only. Also note that
|
| 902 |
+
# the files will be copied as-is; there are no commands or markers available.
|
| 903 |
+
|
| 904 |
+
HTML_EXTRA_FILES =
|
| 905 |
+
|
| 906 |
+
# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
|
| 907 |
+
# Doxygen will adjust the colors in the style sheet and background images
|
| 908 |
+
# according to this color. Hue is specified as an angle on a colorwheel,
|
| 909 |
+
# see http://en.wikipedia.org/wiki/Hue for more information.
|
| 910 |
+
# For instance the value 0 represents red, 60 is yellow, 120 is green,
|
| 911 |
+
# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
|
| 912 |
+
# The allowed range is 0 to 359.
|
| 913 |
+
|
| 914 |
+
HTML_COLORSTYLE_HUE = 220
|
| 915 |
+
|
| 916 |
+
# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
|
| 917 |
+
# the colors in the HTML output. For a value of 0 the output will use
|
| 918 |
+
# grayscales only. A value of 255 will produce the most vivid colors.
|
| 919 |
+
|
| 920 |
+
HTML_COLORSTYLE_SAT = 100
|
| 921 |
+
|
| 922 |
+
# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
|
| 923 |
+
# the luminance component of the colors in the HTML output. Values below
|
| 924 |
+
# 100 gradually make the output lighter, whereas values above 100 make
|
| 925 |
+
# the output darker. The value divided by 100 is the actual gamma applied,
|
| 926 |
+
# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
|
| 927 |
+
# and 100 does not change the gamma.
|
| 928 |
+
|
| 929 |
+
HTML_COLORSTYLE_GAMMA = 80
|
| 930 |
+
|
| 931 |
+
# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
|
| 932 |
+
# page will contain the date and time when the page was generated. Setting
|
| 933 |
+
# this to NO can help when comparing the output of multiple runs.
|
| 934 |
+
|
| 935 |
+
HTML_TIMESTAMP = YES
|
| 936 |
+
|
| 937 |
+
# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
|
| 938 |
+
# files or namespaces will be aligned in HTML using tables. If set to
|
| 939 |
+
# NO a bullet list will be used.
|
| 940 |
+
|
| 941 |
+
HTML_ALIGN_MEMBERS = YES
|
| 942 |
+
|
| 943 |
+
# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
|
| 944 |
+
# documentation will contain sections that can be hidden and shown after the
|
| 945 |
+
# page has loaded. For this to work a browser that supports
|
| 946 |
+
# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
|
| 947 |
+
# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
|
| 948 |
+
|
| 949 |
+
HTML_DYNAMIC_SECTIONS = NO
|
| 950 |
+
|
| 951 |
+
# If the GENERATE_DOCSET tag is set to YES, additional index files
|
| 952 |
+
# will be generated that can be used as input for Apple's Xcode 3
|
| 953 |
+
# integrated development environment, introduced with OSX 10.5 (Leopard).
|
| 954 |
+
# To create a documentation set, doxygen will generate a Makefile in the
|
| 955 |
+
# HTML output directory. Running make will produce the docset in that
|
| 956 |
+
# directory and running "make install" will install the docset in
|
| 957 |
+
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
|
| 958 |
+
# it at startup.
|
| 959 |
+
# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
|
| 960 |
+
# for more information.
|
| 961 |
+
|
| 962 |
+
GENERATE_DOCSET = NO
|
| 963 |
+
|
| 964 |
+
# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
|
| 965 |
+
# feed. A documentation feed provides an umbrella under which multiple
|
| 966 |
+
# documentation sets from a single provider (such as a company or product suite)
|
| 967 |
+
# can be grouped.
|
| 968 |
+
|
| 969 |
+
DOCSET_FEEDNAME = "Doxygen generated docs"
|
| 970 |
+
|
| 971 |
+
# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
|
| 972 |
+
# should uniquely identify the documentation set bundle. This should be a
|
| 973 |
+
# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
|
| 974 |
+
# will append .docset to the name.
|
| 975 |
+
|
| 976 |
+
DOCSET_BUNDLE_ID = org.doxygen.Project
|
| 977 |
+
|
| 978 |
+
# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
|
| 979 |
+
# the documentation publisher. This should be a reverse domain-name style
|
| 980 |
+
# string, e.g. com.mycompany.MyDocSet.documentation.
|
| 981 |
+
|
| 982 |
+
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
|
| 983 |
+
|
| 984 |
+
# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
|
| 985 |
+
|
| 986 |
+
DOCSET_PUBLISHER_NAME = Publisher
|
| 987 |
+
|
| 988 |
+
# If the GENERATE_HTMLHELP tag is set to YES, additional index files
|
| 989 |
+
# will be generated that can be used as input for tools like the
|
| 990 |
+
# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
|
| 991 |
+
# of the generated HTML documentation.
|
| 992 |
+
|
| 993 |
+
GENERATE_HTMLHELP = NO
|
| 994 |
+
|
| 995 |
+
# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
|
| 996 |
+
# be used to specify the file name of the resulting .chm file. You
|
| 997 |
+
# can add a path in front of the file if the result should not be
|
| 998 |
+
# written to the html output directory.
|
| 999 |
+
|
| 1000 |
+
CHM_FILE =
|
| 1001 |
+
|
| 1002 |
+
# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
|
| 1003 |
+
# be used to specify the location (absolute path including file name) of
|
| 1004 |
+
# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
|
| 1005 |
+
# the HTML help compiler on the generated index.hhp.
|
| 1006 |
+
|
| 1007 |
+
HHC_LOCATION =
|
| 1008 |
+
|
| 1009 |
+
# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
|
| 1010 |
+
# controls if a separate .chi index file is generated (YES) or that
|
| 1011 |
+
# it should be included in the master .chm file (NO).
|
| 1012 |
+
|
| 1013 |
+
GENERATE_CHI = NO
|
| 1014 |
+
|
| 1015 |
+
# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
|
| 1016 |
+
# is used to encode HtmlHelp index (hhk), content (hhc) and project file
|
| 1017 |
+
# content.
|
| 1018 |
+
|
| 1019 |
+
CHM_INDEX_ENCODING =
|
| 1020 |
+
|
| 1021 |
+
# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
|
| 1022 |
+
# controls whether a binary table of contents is generated (YES) or a
|
| 1023 |
+
# normal table of contents (NO) in the .chm file.
|
| 1024 |
+
|
| 1025 |
+
BINARY_TOC = NO
|
| 1026 |
+
|
| 1027 |
+
# The TOC_EXPAND flag can be set to YES to add extra items for group members
|
| 1028 |
+
# to the contents of the HTML help documentation and to the tree view.
|
| 1029 |
+
|
| 1030 |
+
TOC_EXPAND = NO
|
| 1031 |
+
|
| 1032 |
+
# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
|
| 1033 |
+
# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
|
| 1034 |
+
# that can be used as input for Qt's qhelpgenerator to generate a
|
| 1035 |
+
# Qt Compressed Help (.qch) of the generated HTML documentation.
|
| 1036 |
+
|
| 1037 |
+
GENERATE_QHP = NO
|
| 1038 |
+
|
| 1039 |
+
# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
|
| 1040 |
+
# be used to specify the file name of the resulting .qch file.
|
| 1041 |
+
# The path specified is relative to the HTML output folder.
|
| 1042 |
+
|
| 1043 |
+
QCH_FILE =
|
| 1044 |
+
|
| 1045 |
+
# The QHP_NAMESPACE tag specifies the namespace to use when generating
|
| 1046 |
+
# Qt Help Project output. For more information please see
|
| 1047 |
+
# http://doc.trolltech.com/qthelpproject.html#namespace
|
| 1048 |
+
|
| 1049 |
+
QHP_NAMESPACE = org.doxygen.Project
|
| 1050 |
+
|
| 1051 |
+
# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
|
| 1052 |
+
# Qt Help Project output. For more information please see
|
| 1053 |
+
# http://doc.trolltech.com/qthelpproject.html#virtual-folders
|
| 1054 |
+
|
| 1055 |
+
QHP_VIRTUAL_FOLDER = doc
|
| 1056 |
+
|
| 1057 |
+
# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
|
| 1058 |
+
# add. For more information please see
|
| 1059 |
+
# http://doc.trolltech.com/qthelpproject.html#custom-filters
|
| 1060 |
+
|
| 1061 |
+
QHP_CUST_FILTER_NAME =
|
| 1062 |
+
|
| 1063 |
+
# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
|
| 1064 |
+
# custom filter to add. For more information please see
|
| 1065 |
+
# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
|
| 1066 |
+
# Qt Help Project / Custom Filters</a>.
|
| 1067 |
+
|
| 1068 |
+
QHP_CUST_FILTER_ATTRS =
|
| 1069 |
+
|
| 1070 |
+
# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
|
| 1071 |
+
# project's
|
| 1072 |
+
# filter section matches.
|
| 1073 |
+
# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
|
| 1074 |
+
# Qt Help Project / Filter Attributes</a>.
|
| 1075 |
+
|
| 1076 |
+
QHP_SECT_FILTER_ATTRS =
|
| 1077 |
+
|
| 1078 |
+
# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
|
| 1079 |
+
# be used to specify the location of Qt's qhelpgenerator.
|
| 1080 |
+
# If non-empty doxygen will try to run qhelpgenerator on the generated
|
| 1081 |
+
# .qhp file.
|
| 1082 |
+
|
| 1083 |
+
QHG_LOCATION =
|
| 1084 |
+
|
| 1085 |
+
# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
|
| 1086 |
+
# will be generated, which together with the HTML files, form an Eclipse help
|
| 1087 |
+
# plugin. To install this plugin and make it available under the help contents
|
| 1088 |
+
# menu in Eclipse, the contents of the directory containing the HTML and XML
|
| 1089 |
+
# files needs to be copied into the plugins directory of eclipse. The name of
|
| 1090 |
+
# the directory within the plugins directory should be the same as
|
| 1091 |
+
# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
|
| 1092 |
+
# the help appears.
|
| 1093 |
+
|
| 1094 |
+
GENERATE_ECLIPSEHELP = NO
|
| 1095 |
+
|
| 1096 |
+
# A unique identifier for the eclipse help plugin. When installing the plugin
|
| 1097 |
+
# the directory name containing the HTML and XML files should also have
|
| 1098 |
+
# this name.
|
| 1099 |
+
|
| 1100 |
+
ECLIPSE_DOC_ID = org.doxygen.Project
|
| 1101 |
+
|
| 1102 |
+
# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
|
| 1103 |
+
# at top of each HTML page. The value NO (the default) enables the index and
|
| 1104 |
+
# the value YES disables it. Since the tabs have the same information as the
|
| 1105 |
+
# navigation tree you can set this option to NO if you already set
|
| 1106 |
+
# GENERATE_TREEVIEW to YES.
|
| 1107 |
+
|
| 1108 |
+
DISABLE_INDEX = NO
|
| 1109 |
+
|
| 1110 |
+
# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
|
| 1111 |
+
# structure should be generated to display hierarchical information.
|
| 1112 |
+
# If the tag value is set to YES, a side panel will be generated
|
| 1113 |
+
# containing a tree-like index structure (just like the one that
|
| 1114 |
+
# is generated for HTML Help). For this to work a browser that supports
|
| 1115 |
+
# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
|
| 1116 |
+
# Windows users are probably better off using the HTML help feature.
|
| 1117 |
+
# Since the tree basically has the same information as the tab index you
|
| 1118 |
+
# could consider to set DISABLE_INDEX to NO when enabling this option.
|
| 1119 |
+
|
| 1120 |
+
GENERATE_TREEVIEW = NO
|
| 1121 |
+
|
| 1122 |
+
# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
|
| 1123 |
+
# (range [0,1..20]) that doxygen will group on one line in the generated HTML
|
| 1124 |
+
# documentation. Note that a value of 0 will completely suppress the enum
|
| 1125 |
+
# values from appearing in the overview section.
|
| 1126 |
+
|
| 1127 |
+
ENUM_VALUES_PER_LINE = 4
|
| 1128 |
+
|
| 1129 |
+
# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
|
| 1130 |
+
# and Class Hierarchy pages using a tree view instead of an ordered list.
|
| 1131 |
+
|
| 1132 |
+
USE_INLINE_TREES = NO
|
| 1133 |
+
|
| 1134 |
+
# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
|
| 1135 |
+
# used to set the initial width (in pixels) of the frame in which the tree
|
| 1136 |
+
# is shown.
|
| 1137 |
+
|
| 1138 |
+
TREEVIEW_WIDTH = 250
|
| 1139 |
+
|
| 1140 |
+
# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
|
| 1141 |
+
# links to external symbols imported via tag files in a separate window.
|
| 1142 |
+
|
| 1143 |
+
EXT_LINKS_IN_WINDOW = NO
|
| 1144 |
+
|
| 1145 |
+
# Use this tag to change the font size of Latex formulas included
|
| 1146 |
+
# as images in the HTML documentation. The default is 10. Note that
|
| 1147 |
+
# when you change the font size after a successful doxygen run you need
|
| 1148 |
+
# to manually remove any form_*.png images from the HTML output directory
|
| 1149 |
+
# to force them to be regenerated.
|
| 1150 |
+
|
| 1151 |
+
FORMULA_FONTSIZE = 10
|
| 1152 |
+
|
| 1153 |
+
# Use the FORMULA_TRANPARENT tag to determine whether or not the images
|
| 1154 |
+
# generated for formulas are transparent PNGs. Transparent PNGs are
|
| 1155 |
+
# not supported properly for IE 6.0, but are supported on all modern browsers.
|
| 1156 |
+
# Note that when changing this option you need to delete any form_*.png files
|
| 1157 |
+
# in the HTML output before the changes have effect.
|
| 1158 |
+
|
| 1159 |
+
FORMULA_TRANSPARENT = YES
|
| 1160 |
+
|
| 1161 |
+
# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
|
| 1162 |
+
# (see http://www.mathjax.org) which uses client side Javascript for the
|
| 1163 |
+
# rendering instead of using prerendered bitmaps. Use this if you do not
|
| 1164 |
+
# have LaTeX installed or if you want to formulas look prettier in the HTML
|
| 1165 |
+
# output. When enabled you also need to install MathJax separately and
|
| 1166 |
+
# configure the path to it using the MATHJAX_RELPATH option.
|
| 1167 |
+
|
| 1168 |
+
USE_MATHJAX = NO
|
| 1169 |
+
|
| 1170 |
+
# When MathJax is enabled you need to specify the location relative to the
|
| 1171 |
+
# HTML output directory using the MATHJAX_RELPATH option. The destination
|
| 1172 |
+
# directory should contain the MathJax.js script. For instance, if the mathjax
|
| 1173 |
+
# directory is located at the same level as the HTML output directory, then
|
| 1174 |
+
# MATHJAX_RELPATH should be ../mathjax. The default value points to the
|
| 1175 |
+
# mathjax.org site, so you can quickly see the result without installing
|
| 1176 |
+
# MathJax, but it is strongly recommended to install a local copy of MathJax
|
| 1177 |
+
# before deployment.
|
| 1178 |
+
|
| 1179 |
+
MATHJAX_RELPATH = http://www.mathjax.org/mathjax
|
| 1180 |
+
|
| 1181 |
+
# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
|
| 1182 |
+
# names that should be enabled during MathJax rendering.
|
| 1183 |
+
|
| 1184 |
+
MATHJAX_EXTENSIONS =
|
| 1185 |
+
|
| 1186 |
+
# When the SEARCHENGINE tag is enabled doxygen will generate a search box
|
| 1187 |
+
# for the HTML output. The underlying search engine uses javascript
|
| 1188 |
+
# and DHTML and should work on any modern browser. Note that when using
|
| 1189 |
+
# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
|
| 1190 |
+
# (GENERATE_DOCSET) there is already a search function so this one should
|
| 1191 |
+
# typically be disabled. For large projects the javascript based search engine
|
| 1192 |
+
# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
|
| 1193 |
+
|
| 1194 |
+
SEARCHENGINE = YES
|
| 1195 |
+
|
| 1196 |
+
# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
|
| 1197 |
+
# implemented using a PHP enabled web server instead of at the web client
|
| 1198 |
+
# using Javascript. Doxygen will generate the search PHP script and index
|
| 1199 |
+
# file to put on the web server. The advantage of the server
|
| 1200 |
+
# based approach is that it scales better to large projects and allows
|
| 1201 |
+
# full text search. The disadvantages are that it is more difficult to setup
|
| 1202 |
+
# and does not have live searching capabilities.
|
| 1203 |
+
|
| 1204 |
+
SERVER_BASED_SEARCH = NO
|
| 1205 |
+
|
| 1206 |
+
#---------------------------------------------------------------------------
|
| 1207 |
+
# configuration options related to the LaTeX output
|
| 1208 |
+
#---------------------------------------------------------------------------
|
| 1209 |
+
|
| 1210 |
+
# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
|
| 1211 |
+
# generate Latex output.
|
| 1212 |
+
|
| 1213 |
+
GENERATE_LATEX = NO
|
| 1214 |
+
|
| 1215 |
+
# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
|
| 1216 |
+
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
|
| 1217 |
+
# put in front of it. If left blank `latex' will be used as the default path.
|
| 1218 |
+
|
| 1219 |
+
LATEX_OUTPUT = latex
|
| 1220 |
+
|
| 1221 |
+
# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
|
| 1222 |
+
# invoked. If left blank `latex' will be used as the default command name.
|
| 1223 |
+
# Note that when enabling USE_PDFLATEX this option is only used for
|
| 1224 |
+
# generating bitmaps for formulas in the HTML output, but not in the
|
| 1225 |
+
# Makefile that is written to the output directory.
|
| 1226 |
+
|
| 1227 |
+
LATEX_CMD_NAME = latex
|
| 1228 |
+
|
| 1229 |
+
# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
|
| 1230 |
+
# generate index for LaTeX. If left blank `makeindex' will be used as the
|
| 1231 |
+
# default command name.
|
| 1232 |
+
|
| 1233 |
+
MAKEINDEX_CMD_NAME = makeindex
|
| 1234 |
+
|
| 1235 |
+
# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
|
| 1236 |
+
# LaTeX documents. This may be useful for small projects and may help to
|
| 1237 |
+
# save some trees in general.
|
| 1238 |
+
|
| 1239 |
+
COMPACT_LATEX = NO
|
| 1240 |
+
|
| 1241 |
+
# The PAPER_TYPE tag can be used to set the paper type that is used
|
| 1242 |
+
# by the printer. Possible values are: a4, letter, legal and
|
| 1243 |
+
# executive. If left blank a4wide will be used.
|
| 1244 |
+
|
| 1245 |
+
PAPER_TYPE = a4
|
| 1246 |
+
|
| 1247 |
+
# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
|
| 1248 |
+
# packages that should be included in the LaTeX output.
|
| 1249 |
+
|
| 1250 |
+
EXTRA_PACKAGES =
|
| 1251 |
+
|
| 1252 |
+
# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
|
| 1253 |
+
# the generated latex document. The header should contain everything until
|
| 1254 |
+
# the first chapter. If it is left blank doxygen will generate a
|
| 1255 |
+
# standard header. Notice: only use this tag if you know what you are doing!
|
| 1256 |
+
|
| 1257 |
+
LATEX_HEADER =
|
| 1258 |
+
|
| 1259 |
+
# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
|
| 1260 |
+
# the generated latex document. The footer should contain everything after
|
| 1261 |
+
# the last chapter. If it is left blank doxygen will generate a
|
| 1262 |
+
# standard footer. Notice: only use this tag if you know what you are doing!
|
| 1263 |
+
|
| 1264 |
+
LATEX_FOOTER =
|
| 1265 |
+
|
| 1266 |
+
# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
|
| 1267 |
+
# is prepared for conversion to pdf (using ps2pdf). The pdf file will
|
| 1268 |
+
# contain links (just like the HTML output) instead of page references
|
| 1269 |
+
# This makes the output suitable for online browsing using a pdf viewer.
|
| 1270 |
+
|
| 1271 |
+
PDF_HYPERLINKS = YES
|
| 1272 |
+
|
| 1273 |
+
# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
|
| 1274 |
+
# plain latex in the generated Makefile. Set this option to YES to get a
|
| 1275 |
+
# higher quality PDF documentation.
|
| 1276 |
+
|
| 1277 |
+
USE_PDFLATEX = YES
|
| 1278 |
+
|
| 1279 |
+
# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
|
| 1280 |
+
# command to the generated LaTeX files. This will instruct LaTeX to keep
|
| 1281 |
+
# running if errors occur, instead of asking the user for help.
|
| 1282 |
+
# This option is also used when generating formulas in HTML.
|
| 1283 |
+
|
| 1284 |
+
LATEX_BATCHMODE = NO
|
| 1285 |
+
|
| 1286 |
+
# If LATEX_HIDE_INDICES is set to YES then doxygen will not
|
| 1287 |
+
# include the index chapters (such as File Index, Compound Index, etc.)
|
| 1288 |
+
# in the output.
|
| 1289 |
+
|
| 1290 |
+
LATEX_HIDE_INDICES = NO
|
| 1291 |
+
|
| 1292 |
+
# If LATEX_SOURCE_CODE is set to YES then doxygen will include
|
| 1293 |
+
# source code with syntax highlighting in the LaTeX output.
|
| 1294 |
+
# Note that which sources are shown also depends on other settings
|
| 1295 |
+
# such as SOURCE_BROWSER.
|
| 1296 |
+
|
| 1297 |
+
LATEX_SOURCE_CODE = NO
|
| 1298 |
+
|
| 1299 |
+
# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
|
| 1300 |
+
# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
|
| 1301 |
+
# http://en.wikipedia.org/wiki/BibTeX for more info.
|
| 1302 |
+
|
| 1303 |
+
LATEX_BIB_STYLE = plain
|
| 1304 |
+
|
| 1305 |
+
#---------------------------------------------------------------------------
|
| 1306 |
+
# configuration options related to the RTF output
|
| 1307 |
+
#---------------------------------------------------------------------------
|
| 1308 |
+
|
| 1309 |
+
# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
|
| 1310 |
+
# The RTF output is optimized for Word 97 and may not look very pretty with
|
| 1311 |
+
# other RTF readers or editors.
|
| 1312 |
+
|
| 1313 |
+
GENERATE_RTF = NO
|
| 1314 |
+
|
| 1315 |
+
# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
|
| 1316 |
+
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
|
| 1317 |
+
# put in front of it. If left blank `rtf' will be used as the default path.
|
| 1318 |
+
|
| 1319 |
+
RTF_OUTPUT = rtf
|
| 1320 |
+
|
| 1321 |
+
# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
|
| 1322 |
+
# RTF documents. This may be useful for small projects and may help to
|
| 1323 |
+
# save some trees in general.
|
| 1324 |
+
|
| 1325 |
+
COMPACT_RTF = NO
|
| 1326 |
+
|
| 1327 |
+
# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
|
| 1328 |
+
# will contain hyperlink fields. The RTF file will
|
| 1329 |
+
# contain links (just like the HTML output) instead of page references.
|
| 1330 |
+
# This makes the output suitable for online browsing using WORD or other
|
| 1331 |
+
# programs which support those fields.
|
| 1332 |
+
# Note: wordpad (write) and others do not support links.
|
| 1333 |
+
|
| 1334 |
+
RTF_HYPERLINKS = NO
|
| 1335 |
+
|
| 1336 |
+
# Load style sheet definitions from file. Syntax is similar to doxygen's
|
| 1337 |
+
# config file, i.e. a series of assignments. You only have to provide
|
| 1338 |
+
# replacements, missing definitions are set to their default value.
|
| 1339 |
+
|
| 1340 |
+
RTF_STYLESHEET_FILE =
|
| 1341 |
+
|
| 1342 |
+
# Set optional variables used in the generation of an rtf document.
|
| 1343 |
+
# Syntax is similar to doxygen's config file.
|
| 1344 |
+
|
| 1345 |
+
RTF_EXTENSIONS_FILE =
|
| 1346 |
+
|
| 1347 |
+
#---------------------------------------------------------------------------
|
| 1348 |
+
# configuration options related to the man page output
|
| 1349 |
+
#---------------------------------------------------------------------------
|
| 1350 |
+
|
| 1351 |
+
# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
|
| 1352 |
+
# generate man pages
|
| 1353 |
+
|
| 1354 |
+
GENERATE_MAN = NO
|
| 1355 |
+
|
| 1356 |
+
# The MAN_OUTPUT tag is used to specify where the man pages will be put.
|
| 1357 |
+
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
|
| 1358 |
+
# put in front of it. If left blank `man' will be used as the default path.
|
| 1359 |
+
|
| 1360 |
+
MAN_OUTPUT = man
|
| 1361 |
+
|
| 1362 |
+
# The MAN_EXTENSION tag determines the extension that is added to
|
| 1363 |
+
# the generated man pages (default is the subroutine's section .3)
|
| 1364 |
+
|
| 1365 |
+
MAN_EXTENSION = .3
|
| 1366 |
+
|
| 1367 |
+
# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
|
| 1368 |
+
# then it will generate one additional man file for each entity
|
| 1369 |
+
# documented in the real man page(s). These additional files
|
| 1370 |
+
# only source the real man page, but without them the man command
|
| 1371 |
+
# would be unable to find the correct page. The default is NO.
|
| 1372 |
+
|
| 1373 |
+
MAN_LINKS = NO
|
| 1374 |
+
|
| 1375 |
+
#---------------------------------------------------------------------------
|
| 1376 |
+
# configuration options related to the XML output
|
| 1377 |
+
#---------------------------------------------------------------------------
|
| 1378 |
+
|
| 1379 |
+
# If the GENERATE_XML tag is set to YES Doxygen will
|
| 1380 |
+
# generate an XML file that captures the structure of
|
| 1381 |
+
# the code including all documentation.
|
| 1382 |
+
|
| 1383 |
+
GENERATE_XML = NO
|
| 1384 |
+
|
| 1385 |
+
# The XML_OUTPUT tag is used to specify where the XML pages will be put.
|
| 1386 |
+
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
|
| 1387 |
+
# put in front of it. If left blank `xml' will be used as the default path.
|
| 1388 |
+
|
| 1389 |
+
XML_OUTPUT = xml
|
| 1390 |
+
|
| 1391 |
+
# The XML_SCHEMA tag can be used to specify an XML schema,
|
| 1392 |
+
# which can be used by a validating XML parser to check the
|
| 1393 |
+
# syntax of the XML files.
|
| 1394 |
+
|
| 1395 |
+
XML_SCHEMA =
|
| 1396 |
+
|
| 1397 |
+
# The XML_DTD tag can be used to specify an XML DTD,
|
| 1398 |
+
# which can be used by a validating XML parser to check the
|
| 1399 |
+
# syntax of the XML files.
|
| 1400 |
+
|
| 1401 |
+
XML_DTD =
|
| 1402 |
+
|
| 1403 |
+
# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
|
| 1404 |
+
# dump the program listings (including syntax highlighting
|
| 1405 |
+
# and cross-referencing information) to the XML output. Note that
|
| 1406 |
+
# enabling this will significantly increase the size of the XML output.
|
| 1407 |
+
|
| 1408 |
+
XML_PROGRAMLISTING = YES
|
| 1409 |
+
|
| 1410 |
+
#---------------------------------------------------------------------------
|
| 1411 |
+
# configuration options for the AutoGen Definitions output
|
| 1412 |
+
#---------------------------------------------------------------------------
|
| 1413 |
+
|
| 1414 |
+
# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
|
| 1415 |
+
# generate an AutoGen Definitions (see autogen.sf.net) file
|
| 1416 |
+
# that captures the structure of the code including all
|
| 1417 |
+
# documentation. Note that this feature is still experimental
|
| 1418 |
+
# and incomplete at the moment.
|
| 1419 |
+
|
| 1420 |
+
GENERATE_AUTOGEN_DEF = NO
|
| 1421 |
+
|
| 1422 |
+
#---------------------------------------------------------------------------
|
| 1423 |
+
# configuration options related to the Perl module output
|
| 1424 |
+
#---------------------------------------------------------------------------
|
| 1425 |
+
|
| 1426 |
+
# If the GENERATE_PERLMOD tag is set to YES Doxygen will
|
| 1427 |
+
# generate a Perl module file that captures the structure of
|
| 1428 |
+
# the code including all documentation. Note that this
|
| 1429 |
+
# feature is still experimental and incomplete at the
|
| 1430 |
+
# moment.
|
| 1431 |
+
|
| 1432 |
+
GENERATE_PERLMOD = NO
|
| 1433 |
+
|
| 1434 |
+
# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
|
| 1435 |
+
# the necessary Makefile rules, Perl scripts and LaTeX code to be able
|
| 1436 |
+
# to generate PDF and DVI output from the Perl module output.
|
| 1437 |
+
|
| 1438 |
+
PERLMOD_LATEX = NO
|
| 1439 |
+
|
| 1440 |
+
# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
|
| 1441 |
+
# nicely formatted so it can be parsed by a human reader.
|
| 1442 |
+
# This is useful
|
| 1443 |
+
# if you want to understand what is going on.
|
| 1444 |
+
# On the other hand, if this
|
| 1445 |
+
# tag is set to NO the size of the Perl module output will be much smaller
|
| 1446 |
+
# and Perl will parse it just the same.
|
| 1447 |
+
|
| 1448 |
+
PERLMOD_PRETTY = YES
|
| 1449 |
+
|
| 1450 |
+
# The names of the make variables in the generated doxyrules.make file
|
| 1451 |
+
# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
|
| 1452 |
+
# This is useful so different doxyrules.make files included by the same
|
| 1453 |
+
# Makefile don't overwrite each other's variables.
|
| 1454 |
+
|
| 1455 |
+
PERLMOD_MAKEVAR_PREFIX =
|
| 1456 |
+
|
| 1457 |
+
#---------------------------------------------------------------------------
|
| 1458 |
+
# Configuration options related to the preprocessor
|
| 1459 |
+
#---------------------------------------------------------------------------
|
| 1460 |
+
|
| 1461 |
+
# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
|
| 1462 |
+
# evaluate all C-preprocessor directives found in the sources and include
|
| 1463 |
+
# files.
|
| 1464 |
+
|
| 1465 |
+
ENABLE_PREPROCESSING = YES
|
| 1466 |
+
|
| 1467 |
+
# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
|
| 1468 |
+
# names in the source code. If set to NO (the default) only conditional
|
| 1469 |
+
# compilation will be performed. Macro expansion can be done in a controlled
|
| 1470 |
+
# way by setting EXPAND_ONLY_PREDEF to YES.
|
| 1471 |
+
|
| 1472 |
+
MACRO_EXPANSION = NO
|
| 1473 |
+
|
| 1474 |
+
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
|
| 1475 |
+
# then the macro expansion is limited to the macros specified with the
|
| 1476 |
+
# PREDEFINED and EXPAND_AS_DEFINED tags.
|
| 1477 |
+
|
| 1478 |
+
EXPAND_ONLY_PREDEF = NO
|
| 1479 |
+
|
| 1480 |
+
# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
|
| 1481 |
+
# pointed to by INCLUDE_PATH will be searched when a #include is found.
|
| 1482 |
+
|
| 1483 |
+
SEARCH_INCLUDES = YES
|
| 1484 |
+
|
| 1485 |
+
# The INCLUDE_PATH tag can be used to specify one or more directories that
|
| 1486 |
+
# contain include files that are not input files but should be processed by
|
| 1487 |
+
# the preprocessor.
|
| 1488 |
+
|
| 1489 |
+
INCLUDE_PATH =
|
| 1490 |
+
|
| 1491 |
+
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
|
| 1492 |
+
# patterns (like *.h and *.hpp) to filter out the header-files in the
|
| 1493 |
+
# directories. If left blank, the patterns specified with FILE_PATTERNS will
|
| 1494 |
+
# be used.
|
| 1495 |
+
|
| 1496 |
+
INCLUDE_FILE_PATTERNS =
|
| 1497 |
+
|
| 1498 |
+
# The PREDEFINED tag can be used to specify one or more macro names that
|
| 1499 |
+
# are defined before the preprocessor is started (similar to the -D option of
|
| 1500 |
+
# gcc). The argument of the tag is a list of macros of the form: name
|
| 1501 |
+
# or name=definition (no spaces). If the definition and the = are
|
| 1502 |
+
# omitted =1 is assumed. To prevent a macro definition from being
|
| 1503 |
+
# undefined via #undef or recursively expanded use the := operator
|
| 1504 |
+
# instead of the = operator.
|
| 1505 |
+
|
| 1506 |
+
PREDEFINED =
|
| 1507 |
+
|
| 1508 |
+
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
|
| 1509 |
+
# this tag can be used to specify a list of macro names that should be expanded.
|
| 1510 |
+
# The macro definition that is found in the sources will be used.
|
| 1511 |
+
# Use the PREDEFINED tag if you want to use a different macro definition that
|
| 1512 |
+
# overrules the definition found in the source code.
|
| 1513 |
+
|
| 1514 |
+
EXPAND_AS_DEFINED =
|
| 1515 |
+
|
| 1516 |
+
# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
|
| 1517 |
+
# doxygen's preprocessor will remove all references to function-like macros
|
| 1518 |
+
# that are alone on a line, have an all uppercase name, and do not end with a
|
| 1519 |
+
# semicolon, because these will confuse the parser if not removed.
|
| 1520 |
+
|
| 1521 |
+
SKIP_FUNCTION_MACROS = YES
|
| 1522 |
+
|
| 1523 |
+
#---------------------------------------------------------------------------
|
| 1524 |
+
# Configuration::additions related to external references
|
| 1525 |
+
#---------------------------------------------------------------------------
|
| 1526 |
+
|
| 1527 |
+
# The TAGFILES option can be used to specify one or more tagfiles.
|
| 1528 |
+
# Optionally an initial location of the external documentation
|
| 1529 |
+
# can be added for each tagfile. The format of a tag file without
|
| 1530 |
+
# this location is as follows:
|
| 1531 |
+
#
|
| 1532 |
+
# TAGFILES = file1 file2 ...
|
| 1533 |
+
# Adding location for the tag files is done as follows:
|
| 1534 |
+
#
|
| 1535 |
+
# TAGFILES = file1=loc1 "file2 = loc2" ...
|
| 1536 |
+
# where "loc1" and "loc2" can be relative or absolute paths or
|
| 1537 |
+
# URLs. If a location is present for each tag, the installdox tool
|
| 1538 |
+
# does not have to be run to correct the links.
|
| 1539 |
+
# Note that each tag file must have a unique name
|
| 1540 |
+
# (where the name does NOT include the path)
|
| 1541 |
+
# If a tag file is not located in the directory in which doxygen
|
| 1542 |
+
# is run, you must also specify the path to the tagfile here.
|
| 1543 |
+
|
| 1544 |
+
TAGFILES =
|
| 1545 |
+
|
| 1546 |
+
# When a file name is specified after GENERATE_TAGFILE, doxygen will create
|
| 1547 |
+
# a tag file that is based on the input files it reads.
|
| 1548 |
+
|
| 1549 |
+
GENERATE_TAGFILE =
|
| 1550 |
+
|
| 1551 |
+
# If the ALLEXTERNALS tag is set to YES all external classes will be listed
|
| 1552 |
+
# in the class index. If set to NO only the inherited external classes
|
| 1553 |
+
# will be listed.
|
| 1554 |
+
|
| 1555 |
+
ALLEXTERNALS = NO
|
| 1556 |
+
|
| 1557 |
+
# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
|
| 1558 |
+
# in the modules index. If set to NO, only the current project's groups will
|
| 1559 |
+
# be listed.
|
| 1560 |
+
|
| 1561 |
+
EXTERNAL_GROUPS = YES
|
| 1562 |
+
|
| 1563 |
+
# The PERL_PATH should be the absolute path and name of the perl script
|
| 1564 |
+
# interpreter (i.e. the result of `which perl').
|
| 1565 |
+
|
| 1566 |
+
PERL_PATH = /usr/bin/perl
|
| 1567 |
+
|
| 1568 |
+
#---------------------------------------------------------------------------
|
| 1569 |
+
# Configuration options related to the dot tool
|
| 1570 |
+
#---------------------------------------------------------------------------
|
| 1571 |
+
|
| 1572 |
+
# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
|
| 1573 |
+
# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
|
| 1574 |
+
# or super classes. Setting the tag to NO turns the diagrams off. Note that
|
| 1575 |
+
# this option also works with HAVE_DOT disabled, but it is recommended to
|
| 1576 |
+
# install and use dot, since it yields more powerful graphs.
|
| 1577 |
+
|
| 1578 |
+
CLASS_DIAGRAMS = YES
|
| 1579 |
+
|
| 1580 |
+
# You can define message sequence charts within doxygen comments using the \msc
|
| 1581 |
+
# command. Doxygen will then run the mscgen tool (see
|
| 1582 |
+
# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
|
| 1583 |
+
# documentation. The MSCGEN_PATH tag allows you to specify the directory where
|
| 1584 |
+
# the mscgen tool resides. If left empty the tool is assumed to be found in the
|
| 1585 |
+
# default search path.
|
| 1586 |
+
|
| 1587 |
+
MSCGEN_PATH =
|
| 1588 |
+
|
| 1589 |
+
# If set to YES, the inheritance and collaboration graphs will hide
|
| 1590 |
+
# inheritance and usage relations if the target is undocumented
|
| 1591 |
+
# or is not a class.
|
| 1592 |
+
|
| 1593 |
+
HIDE_UNDOC_RELATIONS = YES
|
| 1594 |
+
|
| 1595 |
+
# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
|
| 1596 |
+
# available from the path. This tool is part of Graphviz, a graph visualization
|
| 1597 |
+
# toolkit from AT&T and Lucent Bell Labs. The other options in this section
|
| 1598 |
+
# have no effect if this option is set to NO (the default)
|
| 1599 |
+
|
| 1600 |
+
HAVE_DOT = YES
|
| 1601 |
+
|
| 1602 |
+
# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
|
| 1603 |
+
# allowed to run in parallel. When set to 0 (the default) doxygen will
|
| 1604 |
+
# base this on the number of processors available in the system. You can set it
|
| 1605 |
+
# explicitly to a value larger than 0 to get control over the balance
|
| 1606 |
+
# between CPU load and processing speed.
|
| 1607 |
+
|
| 1608 |
+
DOT_NUM_THREADS = 0
|
| 1609 |
+
|
| 1610 |
+
# By default doxygen will use the Helvetica font for all dot files that
|
| 1611 |
+
# doxygen generates. When you want a differently looking font you can specify
|
| 1612 |
+
# the font name using DOT_FONTNAME. You need to make sure dot is able to find
|
| 1613 |
+
# the font, which can be done by putting it in a standard location or by setting
|
| 1614 |
+
# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
|
| 1615 |
+
# directory containing the font.
|
| 1616 |
+
|
| 1617 |
+
DOT_FONTNAME = Helvetica
|
| 1618 |
+
|
| 1619 |
+
# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
|
| 1620 |
+
# The default size is 10pt.
|
| 1621 |
+
|
| 1622 |
+
DOT_FONTSIZE = 10
|
| 1623 |
+
|
| 1624 |
+
# By default doxygen will tell dot to use the Helvetica font.
|
| 1625 |
+
# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
|
| 1626 |
+
# set the path where dot can find it.
|
| 1627 |
+
|
| 1628 |
+
DOT_FONTPATH =
|
| 1629 |
+
|
| 1630 |
+
# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
|
| 1631 |
+
# will generate a graph for each documented class showing the direct and
|
| 1632 |
+
# indirect inheritance relations. Setting this tag to YES will force the
|
| 1633 |
+
# CLASS_DIAGRAMS tag to NO.
|
| 1634 |
+
|
| 1635 |
+
CLASS_GRAPH = YES
|
| 1636 |
+
|
| 1637 |
+
# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
|
| 1638 |
+
# will generate a graph for each documented class showing the direct and
|
| 1639 |
+
# indirect implementation dependencies (inheritance, containment, and
|
| 1640 |
+
# class references variables) of the class with other documented classes.
|
| 1641 |
+
|
| 1642 |
+
COLLABORATION_GRAPH = YES
|
| 1643 |
+
|
| 1644 |
+
# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
|
| 1645 |
+
# will generate a graph for groups, showing the direct groups dependencies
|
| 1646 |
+
|
| 1647 |
+
GROUP_GRAPHS = YES
|
| 1648 |
+
|
| 1649 |
+
# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
|
| 1650 |
+
# collaboration diagrams in a style similar to the OMG's Unified Modeling
|
| 1651 |
+
# Language.
|
| 1652 |
+
|
| 1653 |
+
UML_LOOK = NO
|
| 1654 |
+
|
| 1655 |
+
# If set to YES, the inheritance and collaboration graphs will show the
|
| 1656 |
+
# relations between templates and their instances.
|
| 1657 |
+
|
| 1658 |
+
TEMPLATE_RELATIONS = YES
|
| 1659 |
+
|
| 1660 |
+
# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
|
| 1661 |
+
# tags are set to YES then doxygen will generate a graph for each documented
|
| 1662 |
+
# file showing the direct and indirect include dependencies of the file with
|
| 1663 |
+
# other documented files.
|
| 1664 |
+
|
| 1665 |
+
INCLUDE_GRAPH = YES
|
| 1666 |
+
|
| 1667 |
+
# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
|
| 1668 |
+
# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
|
| 1669 |
+
# documented header file showing the documented files that directly or
|
| 1670 |
+
# indirectly include this file.
|
| 1671 |
+
|
| 1672 |
+
INCLUDED_BY_GRAPH = YES
|
| 1673 |
+
|
| 1674 |
+
# If the CALL_GRAPH and HAVE_DOT options are set to YES then
|
| 1675 |
+
# doxygen will generate a call dependency graph for every global function
|
| 1676 |
+
# or class method. Note that enabling this option will significantly increase
|
| 1677 |
+
# the time of a run. So in most cases it will be better to enable call graphs
|
| 1678 |
+
# for selected functions only using the \callgraph command.
|
| 1679 |
+
|
| 1680 |
+
CALL_GRAPH = NO
|
| 1681 |
+
|
| 1682 |
+
# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
|
| 1683 |
+
# doxygen will generate a caller dependency graph for every global function
|
| 1684 |
+
# or class method. Note that enabling this option will significantly increase
|
| 1685 |
+
# the time of a run. So in most cases it will be better to enable caller
|
| 1686 |
+
# graphs for selected functions only using the \callergraph command.
|
| 1687 |
+
|
| 1688 |
+
CALLER_GRAPH = NO
|
| 1689 |
+
|
| 1690 |
+
# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
|
| 1691 |
+
# will generate a graphical hierarchy of all classes instead of a textual one.
|
| 1692 |
+
|
| 1693 |
+
GRAPHICAL_HIERARCHY = YES
|
| 1694 |
+
|
| 1695 |
+
# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
|
| 1696 |
+
# then doxygen will show the dependencies a directory has on other directories
|
| 1697 |
+
# in a graphical way. The dependency relations are determined by the #include
|
| 1698 |
+
# relations between the files in the directories.
|
| 1699 |
+
|
| 1700 |
+
DIRECTORY_GRAPH = YES
|
| 1701 |
+
|
| 1702 |
+
# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
|
| 1703 |
+
# generated by dot. Possible values are svg, png, jpg, or gif.
|
| 1704 |
+
# If left blank png will be used. If you choose svg you need to set
|
| 1705 |
+
# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
|
| 1706 |
+
# visible in IE 9+ (other browsers do not have this requirement).
|
| 1707 |
+
|
| 1708 |
+
DOT_IMAGE_FORMAT = png
|
| 1709 |
+
|
| 1710 |
+
# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
|
| 1711 |
+
# enable generation of interactive SVG images that allow zooming and panning.
|
| 1712 |
+
# Note that this requires a modern browser other than Internet Explorer.
|
| 1713 |
+
# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
|
| 1714 |
+
# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
|
| 1715 |
+
# visible. Older versions of IE do not have SVG support.
|
| 1716 |
+
|
| 1717 |
+
INTERACTIVE_SVG = NO
|
| 1718 |
+
|
| 1719 |
+
# The tag DOT_PATH can be used to specify the path where the dot tool can be
|
| 1720 |
+
# found. If left blank, it is assumed the dot tool can be found in the path.
|
| 1721 |
+
|
| 1722 |
+
DOT_PATH =
|
| 1723 |
+
|
| 1724 |
+
# The DOTFILE_DIRS tag can be used to specify one or more directories that
|
| 1725 |
+
# contain dot files that are included in the documentation (see the
|
| 1726 |
+
# \dotfile command).
|
| 1727 |
+
|
| 1728 |
+
DOTFILE_DIRS =
|
| 1729 |
+
|
| 1730 |
+
# The MSCFILE_DIRS tag can be used to specify one or more directories that
|
| 1731 |
+
# contain msc files that are included in the documentation (see the
|
| 1732 |
+
# \mscfile command).
|
| 1733 |
+
|
| 1734 |
+
MSCFILE_DIRS =
|
| 1735 |
+
|
| 1736 |
+
# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
|
| 1737 |
+
# nodes that will be shown in the graph. If the number of nodes in a graph
|
| 1738 |
+
# becomes larger than this value, doxygen will truncate the graph, which is
|
| 1739 |
+
# visualized by representing a node as a red box. Note that doxygen if the
|
| 1740 |
+
# number of direct children of the root node in a graph is already larger than
|
| 1741 |
+
# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
|
| 1742 |
+
# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
|
| 1743 |
+
|
| 1744 |
+
DOT_GRAPH_MAX_NODES = 50
|
| 1745 |
+
|
| 1746 |
+
# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
|
| 1747 |
+
# graphs generated by dot. A depth value of 3 means that only nodes reachable
|
| 1748 |
+
# from the root by following a path via at most 3 edges will be shown. Nodes
|
| 1749 |
+
# that lay further from the root node will be omitted. Note that setting this
|
| 1750 |
+
# option to 1 or 2 may greatly reduce the computation time needed for large
|
| 1751 |
+
# code bases. Also note that the size of a graph can be further restricted by
|
| 1752 |
+
# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
|
| 1753 |
+
|
| 1754 |
+
MAX_DOT_GRAPH_DEPTH = 0
|
| 1755 |
+
|
| 1756 |
+
# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
|
| 1757 |
+
# background. This is disabled by default, because dot on Windows does not
|
| 1758 |
+
# seem to support this out of the box. Warning: Depending on the platform used,
|
| 1759 |
+
# enabling this option may lead to badly anti-aliased labels on the edges of
|
| 1760 |
+
# a graph (i.e. they become hard to read).
|
| 1761 |
+
|
| 1762 |
+
DOT_TRANSPARENT = NO
|
| 1763 |
+
|
| 1764 |
+
# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
|
| 1765 |
+
# files in one run (i.e. multiple -o and -T options on the command line). This
|
| 1766 |
+
# makes dot run faster, but since only newer versions of dot (>1.8.10)
|
| 1767 |
+
# support this, this feature is disabled by default.
|
| 1768 |
+
|
| 1769 |
+
DOT_MULTI_TARGETS = YES
|
| 1770 |
+
|
| 1771 |
+
# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
|
| 1772 |
+
# generate a legend page explaining the meaning of the various boxes and
|
| 1773 |
+
# arrows in the dot generated graphs.
|
| 1774 |
+
|
| 1775 |
+
GENERATE_LEGEND = YES
|
| 1776 |
+
|
| 1777 |
+
# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
|
| 1778 |
+
# remove the intermediate dot files that are used to generate
|
| 1779 |
+
# the various graphs.
|
| 1780 |
+
|
| 1781 |
+
DOT_CLEANUP = YES
|
mosesdecoder/moses-cmd/Jamfile
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
alias deps : ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
|
| 2 |
+
|
| 3 |
+
exe moses : Main.cpp deps ;
|
| 4 |
+
exe vwtrainer : MainVW.cpp deps ;
|
| 5 |
+
exe lmbrgrid : LatticeMBRGrid.cpp deps ;
|
| 6 |
+
alias programs : moses lmbrgrid vwtrainer ;
|
| 7 |
+
|
mosesdecoder/moses-cmd/LatticeMBRGrid.cpp
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id: LatticeMBRGrid.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (c) 2010 University of Edinburgh
|
| 6 |
+
All rights reserved.
|
| 7 |
+
|
| 8 |
+
Redistribution and use in source and binary forms, with or without modification,
|
| 9 |
+
are permitted provided that the following conditions are met:
|
| 10 |
+
|
| 11 |
+
* Redistributions of source code must retain the above copyright notice,
|
| 12 |
+
this list of conditions and the following disclaimer.
|
| 13 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
| 14 |
+
this list of conditions and the following disclaimer in the documentation
|
| 15 |
+
and/or other materials provided with the distribution.
|
| 16 |
+
* Neither the name of the University of Edinburgh nor the names of its contributors
|
| 17 |
+
may be used to endorse or promote products derived from this software
|
| 18 |
+
without specific prior written permission.
|
| 19 |
+
|
| 20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
| 22 |
+
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 23 |
+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
| 24 |
+
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| 25 |
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| 26 |
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| 27 |
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
| 28 |
+
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| 29 |
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 30 |
+
POSSIBILITY OF SUCH DAMAGE.
|
| 31 |
+
***********************************************************************/
|
| 32 |
+
/**
|
| 33 |
+
* Lattice MBR grid search. Enables a grid search through the four parameters (p,r,scale and prune) used in lattice MBR.
|
| 34 |
+
See 'Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation by Tromble, Kumar, Och and Macherey,
|
| 35 |
+
EMNLP 2008 for details of the parameters.
|
| 36 |
+
|
| 37 |
+
The grid search is controlled by specifying comma separated lists for the lmbr parameters (-lmbr-p, -lmbr-r,
|
| 38 |
+
-lmbr-pruning-factor and -mbr-scale). All other parameters are passed through to moses. If any of the lattice mbr
|
| 39 |
+
parameters are missing, then they are set to their default values. Output is of the form:
|
| 40 |
+
sentence-id ||| p r prune scale ||| translation-hypothesis
|
| 41 |
+
**/
|
| 42 |
+
|
| 43 |
+
#include <cstdlib>
|
| 44 |
+
#include <iostream>
|
| 45 |
+
#include <map>
|
| 46 |
+
#include <stdexcept>
|
| 47 |
+
#include <set>
|
| 48 |
+
|
| 49 |
+
#include "moses/IOWrapper.h"
|
| 50 |
+
#include "moses/LatticeMBR.h"
|
| 51 |
+
#include "moses/Manager.h"
|
| 52 |
+
#include "moses/Timer.h"
|
| 53 |
+
#include "moses/StaticData.h"
|
| 54 |
+
#include "util/exception.hh"
|
| 55 |
+
|
| 56 |
+
#include <boost/foreach.hpp>
|
| 57 |
+
#include "moses/TranslationTask.h"
|
| 58 |
+
|
| 59 |
+
using namespace std;
|
| 60 |
+
using namespace Moses;
|
| 61 |
+
|
| 62 |
+
//keys
|
| 63 |
+
enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
|
| 64 |
+
|
| 65 |
+
namespace Moses
|
| 66 |
+
{
|
| 67 |
+
|
| 68 |
+
class Grid
|
| 69 |
+
{
|
| 70 |
+
public:
|
| 71 |
+
/** Add a parameter with key, command line argument, and default value */
|
| 72 |
+
void addParam(gridkey key, const string& arg, float defaultValue) {
|
| 73 |
+
m_args[arg] = key;
|
| 74 |
+
UTIL_THROW_IF2(m_grid.find(key) != m_grid.end(),
|
| 75 |
+
"Couldn't find value for key " << (int) key);
|
| 76 |
+
m_grid[key].push_back(defaultValue);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/** Parse the arguments, removing those that define the grid and returning a copy of the rest */
|
| 80 |
+
void parseArgs(int& argc, char const**& argv) {
|
| 81 |
+
char const** newargv = new char const*[argc+1]; //Space to add mbr parameter
|
| 82 |
+
int newargc = 0;
|
| 83 |
+
for (int i = 0; i < argc; ++i) {
|
| 84 |
+
bool consumed = false;
|
| 85 |
+
for (map<string,gridkey>::const_iterator argi = m_args.begin(); argi != m_args.end(); ++argi) {
|
| 86 |
+
if (!strcmp(argv[i], argi->first.c_str())) {
|
| 87 |
+
++i;
|
| 88 |
+
if (i >= argc) {
|
| 89 |
+
cerr << "Error: missing parameter for " << argi->first << endl;
|
| 90 |
+
throw runtime_error("Missing parameter");
|
| 91 |
+
} else {
|
| 92 |
+
string value = argv[i];
|
| 93 |
+
gridkey key = argi->second;
|
| 94 |
+
if (m_grid[key].size() != 1) {
|
| 95 |
+
throw runtime_error("Duplicate grid argument");
|
| 96 |
+
}
|
| 97 |
+
m_grid[key].clear();
|
| 98 |
+
char delim = ',';
|
| 99 |
+
string::size_type lastpos = value.find_first_not_of(delim);
|
| 100 |
+
string::size_type pos = value.find_first_of(delim,lastpos);
|
| 101 |
+
while (string::npos != pos || string::npos != lastpos) {
|
| 102 |
+
float param = atof(value.substr(lastpos, pos-lastpos).c_str());
|
| 103 |
+
if (!param) {
|
| 104 |
+
cerr << "Error: Illegal grid parameter for " << argi->first << endl;
|
| 105 |
+
throw runtime_error("Illegal grid parameter");
|
| 106 |
+
}
|
| 107 |
+
m_grid[key].push_back(param);
|
| 108 |
+
lastpos = value.find_first_not_of(delim,pos);
|
| 109 |
+
pos = value.find_first_of(delim,lastpos);
|
| 110 |
+
}
|
| 111 |
+
consumed = true;
|
| 112 |
+
}
|
| 113 |
+
if (consumed) break;
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
if (!consumed) {
|
| 117 |
+
// newargv[newargc] = new char[strlen(argv[i]) + 1];
|
| 118 |
+
// strcpy(newargv[newargc],argv[i]);
|
| 119 |
+
newargv[newargc] = argv[i];
|
| 120 |
+
++newargc;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
argc = newargc;
|
| 124 |
+
argv = newargv;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
/** Get the grid for a particular key.*/
|
| 128 |
+
const vector<float>& getGrid(gridkey key) const {
|
| 129 |
+
map<gridkey,vector<float> >::const_iterator iter = m_grid.find(key);
|
| 130 |
+
assert (iter != m_grid.end());
|
| 131 |
+
return iter->second;
|
| 132 |
+
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
private:
|
| 136 |
+
map<gridkey,vector<float> > m_grid;
|
| 137 |
+
map<string,gridkey> m_args;
|
| 138 |
+
};
|
| 139 |
+
|
| 140 |
+
} // namespace
|
| 141 |
+
|
| 142 |
+
int main(int argc, char const* argv[])
|
| 143 |
+
{
|
| 144 |
+
cerr << "Lattice MBR Grid search" << endl;
|
| 145 |
+
|
| 146 |
+
Grid grid;
|
| 147 |
+
grid.addParam(lmbr_p, "-lmbr-p", 0.5);
|
| 148 |
+
grid.addParam(lmbr_r, "-lmbr-r", 0.5);
|
| 149 |
+
grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
|
| 150 |
+
grid.addParam(lmbr_scale, "-mbr-scale",1.0);
|
| 151 |
+
|
| 152 |
+
grid.parseArgs(argc,argv);
|
| 153 |
+
|
| 154 |
+
Parameter* params = new Parameter();
|
| 155 |
+
if (!params->LoadParam(argc,argv)) {
|
| 156 |
+
params->Explain();
|
| 157 |
+
exit(1);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
ResetUserTime();
|
| 161 |
+
if (!StaticData::LoadDataStatic(params, argv[0])) {
|
| 162 |
+
exit(1);
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
|
| 166 |
+
boost::shared_ptr<AllOptions> opts(new AllOptions(*SD.options()));
|
| 167 |
+
LMBR_Options& lmbr = opts->lmbr;
|
| 168 |
+
MBR_Options& mbr = opts->mbr;
|
| 169 |
+
lmbr.enabled = true;
|
| 170 |
+
|
| 171 |
+
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
|
| 172 |
+
if (!ioWrapper) {
|
| 173 |
+
throw runtime_error("Failed to initialise IOWrapper");
|
| 174 |
+
}
|
| 175 |
+
size_t nBestSize = mbr.size;
|
| 176 |
+
|
| 177 |
+
if (nBestSize <= 0) {
|
| 178 |
+
throw new runtime_error("Non-positive size specified for n-best list");
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
const vector<float>& pgrid = grid.getGrid(lmbr_p);
|
| 182 |
+
const vector<float>& rgrid = grid.getGrid(lmbr_r);
|
| 183 |
+
const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
|
| 184 |
+
const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
|
| 185 |
+
|
| 186 |
+
boost::shared_ptr<InputType> source;
|
| 187 |
+
while((source = ioWrapper->ReadInput()) != NULL) {
|
| 188 |
+
// set up task of translating one sentence
|
| 189 |
+
boost::shared_ptr<TranslationTask> ttask;
|
| 190 |
+
ttask = TranslationTask::create(source, ioWrapper);
|
| 191 |
+
Manager manager(ttask);
|
| 192 |
+
manager.Decode();
|
| 193 |
+
TrellisPathList nBestList;
|
| 194 |
+
manager.CalcNBest(nBestSize, nBestList,true);
|
| 195 |
+
//grid search
|
| 196 |
+
BOOST_FOREACH(float const& p, pgrid) {
|
| 197 |
+
lmbr.precision = p;
|
| 198 |
+
BOOST_FOREACH(float const& r, rgrid) {
|
| 199 |
+
lmbr.ratio = r;
|
| 200 |
+
BOOST_FOREACH(size_t const prune_i, prune_grid) {
|
| 201 |
+
lmbr.pruning_factor = prune_i;
|
| 202 |
+
BOOST_FOREACH(float const& scale_i, scale_grid) {
|
| 203 |
+
mbr.scale = scale_i;
|
| 204 |
+
size_t lineCount = source->GetTranslationId();
|
| 205 |
+
cout << lineCount << " ||| " << p << " "
|
| 206 |
+
<< r << " " << size_t(prune_i) << " " << scale_i
|
| 207 |
+
<< " ||| ";
|
| 208 |
+
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
|
| 209 |
+
manager.OutputBestHypo(mbrBestHypo, cout);
|
| 210 |
+
}
|
| 211 |
+
}
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
}
|
mosesdecoder/moses-cmd/Main.cpp
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2009 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
/**
|
| 23 |
+
* Moses main wrapper for executable for single-threaded and multi-threaded, simply calling decoder_main.
|
| 24 |
+
**/
|
| 25 |
+
#include "moses/ExportInterface.h"
|
| 26 |
+
#include "util/string_stream.hh"
|
| 27 |
+
|
| 28 |
+
/** main function of the command line version of the decoder **/
|
| 29 |
+
int main(int argc, char const** argv)
|
| 30 |
+
{
|
| 31 |
+
return decoder_main(argc, argv);
|
| 32 |
+
}
|
| 33 |
+
|
mosesdecoder/moses-cmd/MainVW.cpp
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id: MainMT.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2009 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
/**
|
| 23 |
+
* Moses main, for single-threaded and multi-threaded.
|
| 24 |
+
**/
|
| 25 |
+
#include <exception>
|
| 26 |
+
#include <fstream>
|
| 27 |
+
#include <sstream>
|
| 28 |
+
#include <vector>
|
| 29 |
+
|
| 30 |
+
#include "util/usage.hh"
|
| 31 |
+
|
| 32 |
+
#ifdef WIN32
|
| 33 |
+
// Include Visual Leak Detector
|
| 34 |
+
//#include <vld.h>
|
| 35 |
+
#endif
|
| 36 |
+
|
| 37 |
+
#include "moses/IOWrapper.h"
|
| 38 |
+
#include "moses/Hypothesis.h"
|
| 39 |
+
#include "moses/Manager.h"
|
| 40 |
+
#include "moses/StaticData.h"
|
| 41 |
+
#include "moses/TypeDef.h"
|
| 42 |
+
#include "moses/Util.h"
|
| 43 |
+
#include "moses/Timer.h"
|
| 44 |
+
#include "moses/TranslationModel/PhraseDictionary.h"
|
| 45 |
+
#include "moses/FF/StatefulFeatureFunction.h"
|
| 46 |
+
#include "moses/FF/StatelessFeatureFunction.h"
|
| 47 |
+
#include "moses/TrainingTask.h"
|
| 48 |
+
#include "util/random.hh"
|
| 49 |
+
|
| 50 |
+
#ifdef HAVE_PROTOBUF
|
| 51 |
+
#include "hypergraph.pb.h"
|
| 52 |
+
#endif
|
| 53 |
+
|
| 54 |
+
using namespace std;
|
| 55 |
+
using namespace Moses;
|
| 56 |
+
|
| 57 |
+
namespace Moses
|
| 58 |
+
{
|
| 59 |
+
|
| 60 |
+
void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
|
| 61 |
+
{
|
| 62 |
+
outputSearchGraphStream.setf(std::ios::fixed);
|
| 63 |
+
outputSearchGraphStream.precision(6);
|
| 64 |
+
StaticData::Instance().GetAllWeights().Save(outputSearchGraphStream);
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
} //namespace
|
| 69 |
+
|
| 70 |
+
/** main function of the command line version of the decoder **/
|
| 71 |
+
int main(int argc, char const** argv)
|
| 72 |
+
{
|
| 73 |
+
//setting in the Staticdata a link between the thread id of this process and a NULL tasksptr
|
| 74 |
+
// StaticData::InstanceNonConst().SetTask(); // => moved into StaticData constructor
|
| 75 |
+
|
| 76 |
+
try {
|
| 77 |
+
|
| 78 |
+
#ifdef HAVE_PROTOBUF
|
| 79 |
+
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
| 80 |
+
#endif
|
| 81 |
+
|
| 82 |
+
// echo command line, if verbose
|
| 83 |
+
IFVERBOSE(1) {
|
| 84 |
+
TRACE_ERR("command: ");
|
| 85 |
+
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
|
| 86 |
+
TRACE_ERR(endl);
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
// set number of significant decimals in output
|
| 90 |
+
FixPrecision(cout);
|
| 91 |
+
FixPrecision(cerr);
|
| 92 |
+
|
| 93 |
+
// load all the settings into the Parameter class
|
| 94 |
+
// (stores them as strings, or array of strings)
|
| 95 |
+
Parameter params;
|
| 96 |
+
if (!params.LoadParam(argc,argv)) {
|
| 97 |
+
exit(1);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
// initialize all "global" variables, which are stored in StaticData
|
| 102 |
+
// note: this also loads models such as the language model, etc.
|
| 103 |
+
ResetUserTime();
|
| 104 |
+
if (!StaticData::LoadDataStatic(¶ms, argv[0])) {
|
| 105 |
+
exit(1);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// setting "-show-weights" -> just dump out weights and exit
|
| 109 |
+
if (params.isParamSpecified("show-weights")) {
|
| 110 |
+
ShowWeights();
|
| 111 |
+
exit(0);
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
// shorthand for accessing information in StaticData
|
| 115 |
+
const StaticData& staticData = StaticData::Instance();
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
//initialise random numbers
|
| 119 |
+
util::rand_init();
|
| 120 |
+
|
| 121 |
+
// set up read/writing class
|
| 122 |
+
IFVERBOSE(1) {
|
| 123 |
+
PrintUserTime("Created input-output object");
|
| 124 |
+
}
|
| 125 |
+
AllOptions::ptr opts(new AllOptions(*StaticData::Instance().options()));
|
| 126 |
+
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper(*opts));
|
| 127 |
+
if (ioWrapper == NULL) {
|
| 128 |
+
cerr << "Error; Failed to create IO object" << endl;
|
| 129 |
+
exit(1);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
// check on weights
|
| 133 |
+
const ScoreComponentCollection& weights = staticData.GetAllWeights();
|
| 134 |
+
IFVERBOSE(2) {
|
| 135 |
+
TRACE_ERR("The global weight vector looks like this: ");
|
| 136 |
+
TRACE_ERR(weights);
|
| 137 |
+
TRACE_ERR("\n");
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
#ifdef WITH_THREADS
|
| 141 |
+
#pragma message ("Compiling with Threads.")
|
| 142 |
+
ThreadPool pool(staticData.ThreadCount());
|
| 143 |
+
#endif
|
| 144 |
+
|
| 145 |
+
// main loop over set of input sentences
|
| 146 |
+
|
| 147 |
+
boost::shared_ptr<ContextScope> scope(new ContextScope);
|
| 148 |
+
boost::shared_ptr<InputType> source;
|
| 149 |
+
while ((source = ioWrapper->ReadInput()) != NULL) {
|
| 150 |
+
IFVERBOSE(1) {
|
| 151 |
+
ResetUserTime();
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
// set up task of training one sentence
|
| 155 |
+
boost::shared_ptr<TrainingTask> task;
|
| 156 |
+
task = TrainingTask::create(source, ioWrapper, scope);
|
| 157 |
+
|
| 158 |
+
// execute task
|
| 159 |
+
#ifdef WITH_THREADS
|
| 160 |
+
pool.Submit(task);
|
| 161 |
+
#else
|
| 162 |
+
task->Run();
|
| 163 |
+
#endif
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
// we are done, finishing up
|
| 167 |
+
#ifdef WITH_THREADS
|
| 168 |
+
pool.Stop(true); //flush remaining jobs
|
| 169 |
+
#endif
|
| 170 |
+
|
| 171 |
+
FeatureFunction::Destroy();
|
| 172 |
+
|
| 173 |
+
} catch (const std::exception &e) {
|
| 174 |
+
std::cerr << "Exception: " << e.what() << std::endl;
|
| 175 |
+
return EXIT_FAILURE;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
IFVERBOSE(1) util::PrintUsage(std::cerr);
|
| 179 |
+
|
| 180 |
+
#ifndef EXIT_RETURN
|
| 181 |
+
//This avoids that destructors are called (it can take a long time)
|
| 182 |
+
exit(EXIT_SUCCESS);
|
| 183 |
+
#else
|
| 184 |
+
return EXIT_SUCCESS;
|
| 185 |
+
#endif
|
| 186 |
+
}
|
mosesdecoder/moses-cmd/MainVW.h
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
// $Id$
|
| 3 |
+
|
| 4 |
+
/***********************************************************************
|
| 5 |
+
Moses - factored phrase-based language decoder
|
| 6 |
+
Copyright (c) 2006 University of Edinburgh
|
| 7 |
+
All rights reserved.
|
| 8 |
+
|
| 9 |
+
Redistribution and use in source and binary forms, with or without modification,
|
| 10 |
+
are permitted provided that the following conditions are met:
|
| 11 |
+
|
| 12 |
+
* Redistributions of source code must retain the above copyright notice,
|
| 13 |
+
this list of conditions and the following disclaimer.
|
| 14 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
| 15 |
+
this list of conditions and the following disclaimer in the documentation
|
| 16 |
+
and/or other materials provided with the distribution.
|
| 17 |
+
* Neither the name of the University of Edinburgh nor the names of its contributors
|
| 18 |
+
may be used to endorse or promote products derived from this software
|
| 19 |
+
without specific prior written permission.
|
| 20 |
+
|
| 21 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 22 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
| 23 |
+
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 24 |
+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
| 25 |
+
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| 26 |
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| 27 |
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| 28 |
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
| 29 |
+
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| 30 |
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| 31 |
+
POSSIBILITY OF SUCH DAMAGE.
|
| 32 |
+
***********************************************************************/
|
| 33 |
+
|
| 34 |
+
// example file on how to use moses library
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
#include "moses/StaticData.h"
|
| 38 |
+
|
| 39 |
+
class IOWrapper;
|
| 40 |
+
|
| 41 |
+
int main(int argc, char* argv[]);
|
| 42 |
+
|
mosesdecoder/moses2/AlignmentInfo.h
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#pragma once
|
| 21 |
+
|
| 22 |
+
#include <iostream>
|
| 23 |
+
#include <ostream>
|
| 24 |
+
#include <set>
|
| 25 |
+
#include <vector>
|
| 26 |
+
#include <cstdlib>
|
| 27 |
+
|
| 28 |
+
#include <boost/functional/hash.hpp>
|
| 29 |
+
#include "TypeDef.h"
|
| 30 |
+
|
| 31 |
+
namespace Moses2
|
| 32 |
+
{
|
| 33 |
+
|
| 34 |
+
class AlignmentInfoCollection;
|
| 35 |
+
class System;
|
| 36 |
+
|
| 37 |
+
/** Collection of non-terminal alignment pairs, ordered by source index.
|
| 38 |
+
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
|
| 39 |
+
*/
|
| 40 |
+
class AlignmentInfo
|
| 41 |
+
{
|
| 42 |
+
friend struct AlignmentInfoOrderer;
|
| 43 |
+
friend struct AlignmentInfoHasher;
|
| 44 |
+
friend class AlignmentInfoCollection;
|
| 45 |
+
friend class VW;
|
| 46 |
+
|
| 47 |
+
friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
|
| 48 |
+
|
| 49 |
+
public:
|
| 50 |
+
typedef std::set<std::pair<size_t,size_t> > CollType;
|
| 51 |
+
typedef std::vector<size_t> NonTermIndexMap;
|
| 52 |
+
typedef CollType::const_iterator const_iterator;
|
| 53 |
+
|
| 54 |
+
const_iterator begin() const {
|
| 55 |
+
return m_collection.begin();
|
| 56 |
+
}
|
| 57 |
+
const_iterator end() const {
|
| 58 |
+
return m_collection.end();
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
void Add(size_t sourcePos, size_t targetPos) {
|
| 62 |
+
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
|
| 63 |
+
}
|
| 64 |
+
/** Provides a map from target-side to source-side non-terminal indices.
|
| 65 |
+
* The target-side index should be the rule symbol index (COUNTING terminals).
|
| 66 |
+
* The index returned is the rule non-terminal index (IGNORING terminals).
|
| 67 |
+
*/
|
| 68 |
+
const NonTermIndexMap &GetNonTermIndexMap() const {
|
| 69 |
+
return m_nonTermIndexMap;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
|
| 73 |
+
* the index counting both terminals and non-terminals) */
|
| 74 |
+
const NonTermIndexMap &GetNonTermIndexMap2() const {
|
| 75 |
+
return m_nonTermIndexMap2;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
const CollType &GetAlignments() const {
|
| 79 |
+
return m_collection;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
|
| 83 |
+
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
|
| 84 |
+
|
| 85 |
+
size_t GetSize() const {
|
| 86 |
+
return m_collection.size();
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
std::vector< const std::pair<size_t,size_t>* >
|
| 90 |
+
GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
|
| 91 |
+
|
| 92 |
+
std::vector<size_t> GetSourceIndex2PosMap() const;
|
| 93 |
+
|
| 94 |
+
bool operator==(const AlignmentInfo& rhs) const {
|
| 95 |
+
return m_collection == rhs.m_collection &&
|
| 96 |
+
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
std::string Debug(const System &system) const;
|
| 100 |
+
|
| 101 |
+
private:
|
| 102 |
+
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
|
| 103 |
+
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
|
| 104 |
+
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
|
| 105 |
+
|
| 106 |
+
// used only by VW to load word alignment between sentences
|
| 107 |
+
explicit AlignmentInfo(const std::string &str);
|
| 108 |
+
|
| 109 |
+
void BuildNonTermIndexMaps();
|
| 110 |
+
|
| 111 |
+
CollType m_collection;
|
| 112 |
+
NonTermIndexMap m_nonTermIndexMap;
|
| 113 |
+
NonTermIndexMap m_nonTermIndexMap2;
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
|
| 117 |
+
* for use by AlignmentInfoCollection.
|
| 118 |
+
*/
|
| 119 |
+
struct AlignmentInfoOrderer {
|
| 120 |
+
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
|
| 121 |
+
if (a.m_collection == b.m_collection) {
|
| 122 |
+
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
|
| 123 |
+
} else {
|
| 124 |
+
return a.m_collection < b.m_collection;
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
};
|
| 128 |
+
|
| 129 |
+
/**
|
| 130 |
+
* Hashing functoid
|
| 131 |
+
**/
|
| 132 |
+
struct AlignmentInfoHasher {
|
| 133 |
+
size_t operator()(const AlignmentInfo& a) const {
|
| 134 |
+
size_t seed = 0;
|
| 135 |
+
boost::hash_combine(seed,a.m_collection);
|
| 136 |
+
boost::hash_combine(seed,a.m_nonTermIndexMap);
|
| 137 |
+
return seed;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
};
|
| 141 |
+
|
| 142 |
+
inline size_t hash_value(const AlignmentInfo& a)
|
| 143 |
+
{
|
| 144 |
+
static AlignmentInfoHasher hasher;
|
| 145 |
+
return hasher(a);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
}
|
mosesdecoder/moses2/AlignmentInfoCollection.cpp
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/***********************************************************************
|
| 2 |
+
Moses - statistical machine translation system
|
| 3 |
+
Copyright (C) 2006-2011 University of Edinburgh
|
| 4 |
+
|
| 5 |
+
This library is free software; you can redistribute it and/or
|
| 6 |
+
modify it under the terms of the GNU Lesser General Public
|
| 7 |
+
License as published by the Free Software Foundation; either
|
| 8 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 9 |
+
|
| 10 |
+
This library is distributed in the hope that it will be useful,
|
| 11 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 12 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 13 |
+
Lesser General Public License for more details.
|
| 14 |
+
|
| 15 |
+
You should have received a copy of the GNU Lesser General Public
|
| 16 |
+
License along with this library; if not, write to the Free Software
|
| 17 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 18 |
+
***********************************************************************/
|
| 19 |
+
|
| 20 |
+
#include "AlignmentInfoCollection.h"
|
| 21 |
+
|
| 22 |
+
using namespace std;
|
| 23 |
+
|
| 24 |
+
namespace Moses2
|
| 25 |
+
{
|
| 26 |
+
|
| 27 |
+
AlignmentInfoCollection AlignmentInfoCollection::s_instance;
|
| 28 |
+
|
| 29 |
+
AlignmentInfoCollection::AlignmentInfoCollection()
|
| 30 |
+
{
|
| 31 |
+
std::set<std::pair<size_t,size_t> > pairs;
|
| 32 |
+
m_emptyAlignmentInfo = Add(pairs);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
AlignmentInfoCollection::~AlignmentInfoCollection()
|
| 36 |
+
{}
|
| 37 |
+
|
| 38 |
+
const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
|
| 39 |
+
{
|
| 40 |
+
return *m_emptyAlignmentInfo;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
AlignmentInfo const *
|
| 44 |
+
AlignmentInfoCollection::
|
| 45 |
+
Add(AlignmentInfo const& ainfo)
|
| 46 |
+
{
|
| 47 |
+
#ifdef WITH_THREADS
|
| 48 |
+
{
|
| 49 |
+
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
|
| 50 |
+
AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
|
| 51 |
+
if (i != m_collection.end())
|
| 52 |
+
return &*i;
|
| 53 |
+
}
|
| 54 |
+
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
|
| 55 |
+
#endif
|
| 56 |
+
std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
|
| 57 |
+
return &(*ret.first);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
}
|
mosesdecoder/moses2/ArcLists.cpp
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* ArcList.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 26 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <iostream>
|
| 8 |
+
#include <sstream>
|
| 9 |
+
#include <algorithm>
|
| 10 |
+
#include <boost/foreach.hpp>
|
| 11 |
+
#include "ArcLists.h"
|
| 12 |
+
#include "HypothesisBase.h"
|
| 13 |
+
#include "util/exception.hh"
|
| 14 |
+
|
| 15 |
+
using namespace std;
|
| 16 |
+
|
| 17 |
+
namespace Moses2
|
| 18 |
+
{
|
| 19 |
+
|
| 20 |
+
ArcLists::ArcLists()
|
| 21 |
+
{
|
| 22 |
+
// TODO Auto-generated constructor stub
|
| 23 |
+
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
ArcLists::~ArcLists()
|
| 27 |
+
{
|
| 28 |
+
BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
|
| 29 |
+
const ArcList *arcList = collPair.second;
|
| 30 |
+
delete arcList;
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
|
| 35 |
+
const HypothesisBase *otherHypo)
|
| 36 |
+
{
|
| 37 |
+
//cerr << added << " " << currHypo << " " << otherHypo << endl;
|
| 38 |
+
ArcList *arcList;
|
| 39 |
+
if (added) {
|
| 40 |
+
// we're winners!
|
| 41 |
+
if (otherHypo) {
|
| 42 |
+
// there was a existing losing hypo
|
| 43 |
+
arcList = &GetAndDetachArcList(otherHypo);
|
| 44 |
+
} else {
|
| 45 |
+
// there was no existing hypo
|
| 46 |
+
arcList = new ArcList;
|
| 47 |
+
}
|
| 48 |
+
m_coll[currHypo] = arcList;
|
| 49 |
+
} else {
|
| 50 |
+
// we're losers!
|
| 51 |
+
// there should be a winner, we're not doing beam pruning
|
| 52 |
+
UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
|
| 53 |
+
arcList = &GetArcList(otherHypo);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// in any case, add the curr hypo
|
| 57 |
+
arcList->push_back(currHypo);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
|
| 61 |
+
{
|
| 62 |
+
Coll::iterator iter = m_coll.find(hypo);
|
| 63 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
| 64 |
+
ArcList &arcList = *iter->second;
|
| 65 |
+
return arcList;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
|
| 69 |
+
{
|
| 70 |
+
Coll::const_iterator iter = m_coll.find(hypo);
|
| 71 |
+
|
| 72 |
+
if (iter == m_coll.end()) {
|
| 73 |
+
cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
|
| 74 |
+
BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
|
| 75 |
+
const HypothesisBase *hypo = collPair.first;
|
| 76 |
+
cerr << hypo << " ";
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
|
| 81 |
+
ArcList &arcList = *iter->second;
|
| 82 |
+
return arcList;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
|
| 86 |
+
{
|
| 87 |
+
Coll::iterator iter = m_coll.find(hypo);
|
| 88 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
| 89 |
+
ArcList &arcList = *iter->second;
|
| 90 |
+
|
| 91 |
+
m_coll.erase(iter);
|
| 92 |
+
|
| 93 |
+
return arcList;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
void ArcLists::Sort()
|
| 97 |
+
{
|
| 98 |
+
BOOST_FOREACH(Coll::value_type &collPair, m_coll) {
|
| 99 |
+
ArcList &list = *collPair.second;
|
| 100 |
+
std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
void ArcLists::Delete(const HypothesisBase *hypo)
|
| 105 |
+
{
|
| 106 |
+
//cerr << "hypo=" << hypo->Debug() << endl;
|
| 107 |
+
//cerr << "m_coll=" << m_coll.size() << endl;
|
| 108 |
+
Coll::iterator iter = m_coll.find(hypo);
|
| 109 |
+
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
|
| 110 |
+
ArcList *arcList = iter->second;
|
| 111 |
+
|
| 112 |
+
m_coll.erase(iter);
|
| 113 |
+
delete arcList;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
std::string ArcLists::Debug(const System &system) const
|
| 117 |
+
{
|
| 118 |
+
stringstream strm;
|
| 119 |
+
BOOST_FOREACH(const Coll::value_type &collPair, m_coll) {
|
| 120 |
+
const ArcList *arcList = collPair.second;
|
| 121 |
+
strm << arcList << "(" << arcList->size() << ") ";
|
| 122 |
+
}
|
| 123 |
+
return strm.str();
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
}
|
| 127 |
+
|
mosesdecoder/moses2/Array.h
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <cassert>
|
| 3 |
+
#include <boost/functional/hash.hpp>
|
| 4 |
+
#include "MemPool.h"
|
| 5 |
+
|
| 6 |
+
namespace Moses2
|
| 7 |
+
{
|
| 8 |
+
|
| 9 |
+
template<typename T>
|
| 10 |
+
class Array
|
| 11 |
+
{
|
| 12 |
+
public:
|
| 13 |
+
typedef T* iterator;
|
| 14 |
+
typedef const T* const_iterator;
|
| 15 |
+
//! iterators
|
| 16 |
+
const_iterator begin() const {
|
| 17 |
+
return m_arr;
|
| 18 |
+
}
|
| 19 |
+
const_iterator end() const {
|
| 20 |
+
return m_arr + m_size;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
iterator begin() {
|
| 24 |
+
return m_arr;
|
| 25 |
+
}
|
| 26 |
+
iterator end() {
|
| 27 |
+
return m_arr + m_size;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
Array(MemPool &pool, size_t size = 0, const T &val = T()) {
|
| 31 |
+
m_size = size;
|
| 32 |
+
m_maxSize = size;
|
| 33 |
+
m_arr = pool.Allocate<T>(size);
|
| 34 |
+
for (size_t i = 0; i < size; ++i) {
|
| 35 |
+
m_arr[i] = val;
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
size_t size() const {
|
| 40 |
+
return m_size;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
const T& operator[](size_t ind) const {
|
| 44 |
+
assert(ind < m_size);
|
| 45 |
+
return m_arr[ind];
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
T& operator[](size_t ind) {
|
| 49 |
+
assert(ind < m_size);
|
| 50 |
+
return m_arr[ind];
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
T *GetArray() {
|
| 54 |
+
return m_arr;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
size_t hash() const {
|
| 58 |
+
size_t seed = 0;
|
| 59 |
+
for (size_t i = 0; i < m_size; ++i) {
|
| 60 |
+
boost::hash_combine(seed, m_arr[i]);
|
| 61 |
+
}
|
| 62 |
+
return seed;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
int Compare(const Array &compare) const {
|
| 66 |
+
|
| 67 |
+
int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
|
| 68 |
+
return cmp;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
bool operator==(const Array &compare) const {
|
| 72 |
+
int cmp = Compare(compare);
|
| 73 |
+
return cmp == 0;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
void resize(size_t newSize) {
|
| 77 |
+
assert(m_size <= m_maxSize);
|
| 78 |
+
m_size = newSize;
|
| 79 |
+
}
|
| 80 |
+
protected:
|
| 81 |
+
size_t m_size, m_maxSize;
|
| 82 |
+
T *m_arr;
|
| 83 |
+
};
|
| 84 |
+
|
| 85 |
+
}
|
mosesdecoder/moses2/EstimatedScores.h
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// $Id$
|
| 2 |
+
|
| 3 |
+
/***********************************************************************
|
| 4 |
+
Moses - factored phrase-based language decoder
|
| 5 |
+
Copyright (C) 2006 University of Edinburgh
|
| 6 |
+
|
| 7 |
+
This library is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU Lesser General Public
|
| 9 |
+
License as published by the Free Software Foundation; either
|
| 10 |
+
version 2.1 of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This library is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 15 |
+
Lesser General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU Lesser General Public
|
| 18 |
+
License along with this library; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
| 20 |
+
***********************************************************************/
|
| 21 |
+
|
| 22 |
+
#pragma once
|
| 23 |
+
|
| 24 |
+
#include <iostream>
|
| 25 |
+
#include "legacy/Util2.h"
|
| 26 |
+
#include "legacy/Bitmap.h"
|
| 27 |
+
#include "legacy/Matrix.h"
|
| 28 |
+
|
| 29 |
+
namespace Moses2
|
| 30 |
+
{
|
| 31 |
+
class MemPool;
|
| 32 |
+
class System;
|
| 33 |
+
|
| 34 |
+
//! A square array of floats to store future costs in the phrase-based decoder
|
| 35 |
+
class EstimatedScores: public Matrix<float>
|
| 36 |
+
{
|
| 37 |
+
public:
|
| 38 |
+
EstimatedScores(MemPool &pool, size_t size) :
|
| 39 |
+
Matrix<float>(pool, size, size) {
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
~EstimatedScores(); // not implemented
|
| 43 |
+
|
| 44 |
+
float CalcEstimatedScore(Bitmap const&) const;
|
| 45 |
+
float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
|
| 46 |
+
|
| 47 |
+
std::ostream &Debug(std::ostream &out, const System &system) const {
|
| 48 |
+
for (size_t endPos = 0; endPos < GetSize(); endPos++) {
|
| 49 |
+
for (size_t startPos = 0; startPos < GetSize(); startPos++)
|
| 50 |
+
out << GetValue(startPos, endPos) << " ";
|
| 51 |
+
out << std::endl;
|
| 52 |
+
}
|
| 53 |
+
return out;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
};
|
| 57 |
+
|
| 58 |
+
}
|
| 59 |
+
|
mosesdecoder/moses2/HypothesisBase.cpp
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Hypothesis.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 24 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include <boost/foreach.hpp>
|
| 9 |
+
#include <stdlib.h>
|
| 10 |
+
#include <deque>
|
| 11 |
+
#include "HypothesisBase.h"
|
| 12 |
+
#include "System.h"
|
| 13 |
+
#include "Scores.h"
|
| 14 |
+
#include "ManagerBase.h"
|
| 15 |
+
#include "MemPool.h"
|
| 16 |
+
#include "FF/StatefulFeatureFunction.h"
|
| 17 |
+
|
| 18 |
+
using namespace std;
|
| 19 |
+
|
| 20 |
+
namespace Moses2
|
| 21 |
+
{
|
| 22 |
+
|
| 23 |
+
//size_t g_numHypos = 0;
|
| 24 |
+
|
| 25 |
+
HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
|
| 26 |
+
{
|
| 27 |
+
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
|
| 28 |
+
system.featureFunctions.GetNumScores());
|
| 29 |
+
|
| 30 |
+
// FF states
|
| 31 |
+
const std::vector<const StatefulFeatureFunction*> &sfffs =
|
| 32 |
+
system.featureFunctions.GetStatefulFeatureFunctions();
|
| 33 |
+
size_t numStatefulFFs = sfffs.size();
|
| 34 |
+
m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
|
| 35 |
+
|
| 36 |
+
BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs) {
|
| 37 |
+
size_t statefulInd = sfff->GetStatefulInd();
|
| 38 |
+
FFState *state = sfff->BlankState(pool, system);
|
| 39 |
+
m_ffStates[statefulInd] = state;
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
size_t HypothesisBase::hash() const
|
| 44 |
+
{
|
| 45 |
+
return hash(0);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
size_t HypothesisBase::hash(size_t seed) const
|
| 49 |
+
{
|
| 50 |
+
size_t numStatefulFFs =
|
| 51 |
+
GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
|
| 52 |
+
|
| 53 |
+
// states
|
| 54 |
+
for (size_t i = 0; i < numStatefulFFs; ++i) {
|
| 55 |
+
const FFState *state = m_ffStates[i];
|
| 56 |
+
size_t hash = state->hash();
|
| 57 |
+
boost::hash_combine(seed, hash);
|
| 58 |
+
}
|
| 59 |
+
return seed;
|
| 60 |
+
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
bool HypothesisBase::operator==(const HypothesisBase &other) const
|
| 64 |
+
{
|
| 65 |
+
size_t numStatefulFFs =
|
| 66 |
+
GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
|
| 67 |
+
|
| 68 |
+
// states
|
| 69 |
+
for (size_t i = 0; i < numStatefulFFs; ++i) {
|
| 70 |
+
const FFState &thisState = *m_ffStates[i];
|
| 71 |
+
const FFState &otherState = *other.m_ffStates[i];
|
| 72 |
+
if (thisState != otherState) {
|
| 73 |
+
return false;
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
return true;
|
| 77 |
+
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
}
|
| 81 |
+
|
mosesdecoder/moses2/HypothesisBase.h
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Hypothesis.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 24 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
#include <iostream>
|
| 10 |
+
#include <cstddef>
|
| 11 |
+
#include "FF/FFState.h"
|
| 12 |
+
#include "Scores.h"
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
class ManagerBase;
|
| 18 |
+
class Scores;
|
| 19 |
+
|
| 20 |
+
class HypothesisBase
|
| 21 |
+
{
|
| 22 |
+
public:
|
| 23 |
+
virtual ~HypothesisBase() {
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
inline ManagerBase &GetManager() const {
|
| 27 |
+
return *m_mgr;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
template<typename T>
|
| 31 |
+
const T &Cast() const {
|
| 32 |
+
return static_cast<const T&>(*this);
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
const Scores &GetScores() const {
|
| 36 |
+
return *m_scores;
|
| 37 |
+
}
|
| 38 |
+
Scores &GetScores() {
|
| 39 |
+
return *m_scores;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
const FFState *GetState(size_t ind) const {
|
| 43 |
+
return m_ffStates[ind];
|
| 44 |
+
}
|
| 45 |
+
FFState *GetState(size_t ind) {
|
| 46 |
+
return m_ffStates[ind];
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
virtual size_t hash() const;
|
| 50 |
+
virtual size_t hash(size_t seed) const;
|
| 51 |
+
virtual bool operator==(const HypothesisBase &other) const;
|
| 52 |
+
|
| 53 |
+
virtual SCORE GetFutureScore() const = 0;
|
| 54 |
+
virtual void EvaluateWhenApplied() = 0;
|
| 55 |
+
|
| 56 |
+
virtual std::string Debug(const System &system) const = 0;
|
| 57 |
+
|
| 58 |
+
protected:
|
| 59 |
+
ManagerBase *m_mgr;
|
| 60 |
+
Scores *m_scores;
|
| 61 |
+
FFState **m_ffStates;
|
| 62 |
+
|
| 63 |
+
HypothesisBase(MemPool &pool, const System &system);
|
| 64 |
+
};
|
| 65 |
+
|
| 66 |
+
////////////////////////////////////////////////////////////////////////////////////
|
| 67 |
+
class HypothesisFutureScoreOrderer
|
| 68 |
+
{
|
| 69 |
+
public:
|
| 70 |
+
bool operator()(const HypothesisBase* a, const HypothesisBase* b) const {
|
| 71 |
+
return a->GetFutureScore() > b->GetFutureScore();
|
| 72 |
+
}
|
| 73 |
+
};
|
| 74 |
+
|
| 75 |
+
}
|
| 76 |
+
|
mosesdecoder/moses2/HypothesisColl.cpp
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* HypothesisColl.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 26 Feb 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <iostream>
|
| 8 |
+
#include <sstream>
|
| 9 |
+
#include <algorithm>
|
| 10 |
+
#include <boost/foreach.hpp>
|
| 11 |
+
#include "HypothesisColl.h"
|
| 12 |
+
#include "ManagerBase.h"
|
| 13 |
+
#include "System.h"
|
| 14 |
+
#include "MemPoolAllocator.h"
|
| 15 |
+
|
| 16 |
+
using namespace std;
|
| 17 |
+
|
| 18 |
+
namespace Moses2
|
| 19 |
+
{
|
| 20 |
+
|
| 21 |
+
HypothesisColl::HypothesisColl(const ManagerBase &mgr)
|
| 22 |
+
:m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool()))
|
| 23 |
+
,m_sortedHypos(NULL)
|
| 24 |
+
{
|
| 25 |
+
m_bestScore = -std::numeric_limits<float>::infinity();
|
| 26 |
+
m_worstScore = std::numeric_limits<float>::infinity();
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
const HypothesisBase *HypothesisColl::GetBestHypo() const
|
| 30 |
+
{
|
| 31 |
+
if (GetSize() == 0) {
|
| 32 |
+
return NULL;
|
| 33 |
+
}
|
| 34 |
+
if (m_sortedHypos) {
|
| 35 |
+
return (*m_sortedHypos)[0];
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
|
| 39 |
+
const HypothesisBase *bestHypo;
|
| 40 |
+
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
|
| 41 |
+
if (hypo->GetFutureScore() > bestScore) {
|
| 42 |
+
bestScore = hypo->GetFutureScore();
|
| 43 |
+
bestHypo = hypo;
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
return bestHypo;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
void HypothesisColl::Add(
|
| 50 |
+
const ManagerBase &mgr,
|
| 51 |
+
HypothesisBase *hypo,
|
| 52 |
+
Recycler<HypothesisBase*> &hypoRecycle,
|
| 53 |
+
ArcLists &arcLists)
|
| 54 |
+
{
|
| 55 |
+
size_t maxStackSize = mgr.system.options.search.stack_size;
|
| 56 |
+
|
| 57 |
+
if (GetSize() > maxStackSize * 2) {
|
| 58 |
+
//cerr << "maxStackSize=" << maxStackSize << " " << GetSize() << endl;
|
| 59 |
+
PruneHypos(mgr, mgr.arcLists);
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
SCORE futureScore = hypo->GetFutureScore();
|
| 63 |
+
|
| 64 |
+
/*
|
| 65 |
+
cerr << "scores:"
|
| 66 |
+
<< futureScore << " "
|
| 67 |
+
<< m_bestScore << " "
|
| 68 |
+
<< GetSize() << " "
|
| 69 |
+
<< endl;
|
| 70 |
+
*/
|
| 71 |
+
if (GetSize() >= maxStackSize && futureScore < m_worstScore) {
|
| 72 |
+
// beam threshold or really bad hypo that won't make the pruning cut
|
| 73 |
+
// as more hypos are added, the m_worstScore stat gets out of date and isn't the optimum cut-off point
|
| 74 |
+
//cerr << "Discard, really bad score:" << hypo->Debug(mgr.system) << endl;
|
| 75 |
+
hypoRecycle.Recycle(hypo);
|
| 76 |
+
return;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
StackAdd added = Add(hypo);
|
| 80 |
+
|
| 81 |
+
size_t nbestSize = mgr.system.options.nbest.nbest_size;
|
| 82 |
+
if (nbestSize) {
|
| 83 |
+
arcLists.AddArc(added.added, hypo, added.other);
|
| 84 |
+
} else {
|
| 85 |
+
if (added.added) {
|
| 86 |
+
if (added.other) {
|
| 87 |
+
hypoRecycle.Recycle(added.other);
|
| 88 |
+
}
|
| 89 |
+
} else {
|
| 90 |
+
hypoRecycle.Recycle(hypo);
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
// update beam variables
|
| 95 |
+
if (added.added) {
|
| 96 |
+
if (futureScore > m_bestScore) {
|
| 97 |
+
m_bestScore = futureScore;
|
| 98 |
+
float beamWidth = mgr.system.options.search.beam_width;
|
| 99 |
+
if ( m_bestScore + beamWidth > m_worstScore ) {
|
| 100 |
+
m_worstScore = m_bestScore + beamWidth;
|
| 101 |
+
}
|
| 102 |
+
} else if (GetSize() <= maxStackSize && futureScore < m_worstScore) {
|
| 103 |
+
m_worstScore = futureScore;
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
|
| 109 |
+
{
|
| 110 |
+
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
|
| 111 |
+
//cerr << endl << "new=" << hypo->Debug(hypo->GetManager().system) << endl;
|
| 112 |
+
|
| 113 |
+
// CHECK RECOMBINATION
|
| 114 |
+
if (addRet.second) {
|
| 115 |
+
// equiv hypo doesn't exists
|
| 116 |
+
//cerr << "Added " << hypo << endl;
|
| 117 |
+
return StackAdd(true, NULL);
|
| 118 |
+
} else {
|
| 119 |
+
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
|
| 120 |
+
//cerr << "hypoExisting=" << hypoExisting->Debug(hypo->GetManager().system) << endl;
|
| 121 |
+
|
| 122 |
+
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
|
| 123 |
+
// incoming hypo is better than the one we have
|
| 124 |
+
//cerr << "Add " << hypo << "(" << hypo->hash() << ")"
|
| 125 |
+
// << " discard existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
|
| 126 |
+
// << endl;
|
| 127 |
+
|
| 128 |
+
const HypothesisBase * const &hypoExisting1 = *addRet.first;
|
| 129 |
+
const HypothesisBase *&hypoExisting2 =
|
| 130 |
+
const_cast<const HypothesisBase *&>(hypoExisting1);
|
| 131 |
+
hypoExisting2 = hypo;
|
| 132 |
+
|
| 133 |
+
return StackAdd(true, hypoExisting);
|
| 134 |
+
} else {
|
| 135 |
+
// already storing the best hypo. discard incoming hypo
|
| 136 |
+
//cerr << "Keep existing " << hypoExisting << "(" << hypoExisting->hash() << ")"
|
| 137 |
+
// << " discard new " << hypo << "(" << hypo->hash() << ")"
|
| 138 |
+
// << endl;
|
| 139 |
+
return StackAdd(false, hypoExisting);
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
//assert(false);
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos(
|
| 147 |
+
const ManagerBase &mgr,
|
| 148 |
+
ArcLists &arcLists) const
|
| 149 |
+
{
|
| 150 |
+
if (m_sortedHypos == NULL) {
|
| 151 |
+
// create sortedHypos first
|
| 152 |
+
MemPool &pool = mgr.GetPool();
|
| 153 |
+
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
|
| 154 |
+
m_coll.size());
|
| 155 |
+
|
| 156 |
+
SortHypos(mgr, m_sortedHypos->GetArray());
|
| 157 |
+
|
| 158 |
+
// prune
|
| 159 |
+
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
|
| 160 |
+
|
| 161 |
+
size_t maxStackSize = mgr.system.options.search.stack_size;
|
| 162 |
+
if (maxStackSize && m_sortedHypos->size() > maxStackSize) {
|
| 163 |
+
for (size_t i = maxStackSize; i < m_sortedHypos->size(); ++i) {
|
| 164 |
+
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
|
| 165 |
+
recycler.Recycle(hypo);
|
| 166 |
+
|
| 167 |
+
// delete from arclist
|
| 168 |
+
if (mgr.system.options.nbest.nbest_size) {
|
| 169 |
+
arcLists.Delete(hypo);
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
m_sortedHypos->resize(maxStackSize);
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
return *m_sortedHypos;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
void HypothesisColl::PruneHypos(const ManagerBase &mgr, ArcLists &arcLists)
|
| 181 |
+
{
|
| 182 |
+
size_t maxStackSize = mgr.system.options.search.stack_size;
|
| 183 |
+
|
| 184 |
+
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycler();
|
| 185 |
+
|
| 186 |
+
const HypothesisBase **sortedHypos = (const HypothesisBase **) alloca(GetSize() * sizeof(const HypothesisBase *));
|
| 187 |
+
SortHypos(mgr, sortedHypos);
|
| 188 |
+
|
| 189 |
+
// update worse score
|
| 190 |
+
m_worstScore = sortedHypos[maxStackSize - 1]->GetFutureScore();
|
| 191 |
+
|
| 192 |
+
// prune
|
| 193 |
+
for (size_t i = maxStackSize; i < GetSize(); ++i) {
|
| 194 |
+
HypothesisBase *hypo = const_cast<HypothesisBase*>(sortedHypos[i]);
|
| 195 |
+
|
| 196 |
+
// delete from arclist
|
| 197 |
+
if (mgr.system.options.nbest.nbest_size) {
|
| 198 |
+
arcLists.Delete(hypo);
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
// delete from collection
|
| 202 |
+
Delete(hypo);
|
| 203 |
+
|
| 204 |
+
recycler.Recycle(hypo);
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
void HypothesisColl::SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const
|
| 210 |
+
{
|
| 211 |
+
size_t maxStackSize = mgr.system.options.search.stack_size;
|
| 212 |
+
//assert(maxStackSize); // can't do stack=0 - unlimited stack size. No-one ever uses that
|
| 213 |
+
//assert(GetSize() > maxStackSize);
|
| 214 |
+
//assert(sortedHypos.size() == GetSize());
|
| 215 |
+
|
| 216 |
+
/*
|
| 217 |
+
cerr << "UNSORTED hypos: ";
|
| 218 |
+
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
|
| 219 |
+
cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
|
| 220 |
+
}
|
| 221 |
+
cerr << endl;
|
| 222 |
+
*/
|
| 223 |
+
size_t ind = 0;
|
| 224 |
+
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
|
| 225 |
+
sortedHypos[ind] = hypo;
|
| 226 |
+
++ind;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
size_t indMiddle;
|
| 230 |
+
if (maxStackSize == 0) {
|
| 231 |
+
indMiddle = GetSize();
|
| 232 |
+
} else if (GetSize() > maxStackSize) {
|
| 233 |
+
indMiddle = maxStackSize;
|
| 234 |
+
} else {
|
| 235 |
+
// GetSize() <= maxStackSize
|
| 236 |
+
indMiddle = GetSize();
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
const HypothesisBase **iterMiddle = sortedHypos + indMiddle;
|
| 240 |
+
|
| 241 |
+
std::partial_sort(
|
| 242 |
+
sortedHypos,
|
| 243 |
+
iterMiddle,
|
| 244 |
+
sortedHypos + GetSize(),
|
| 245 |
+
HypothesisFutureScoreOrderer());
|
| 246 |
+
|
| 247 |
+
/*
|
| 248 |
+
cerr << "sorted hypos: ";
|
| 249 |
+
for (size_t i = 0; i < sortedHypos.size(); ++i) {
|
| 250 |
+
const HypothesisBase *hypo = sortedHypos[i];
|
| 251 |
+
cerr << hypo << " ";
|
| 252 |
+
}
|
| 253 |
+
cerr << endl;
|
| 254 |
+
*/
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
void HypothesisColl::Delete(const HypothesisBase *hypo)
|
| 258 |
+
{
|
| 259 |
+
//cerr << " Delete hypo=" << hypo << "(" << hypo->hash() << ")"
|
| 260 |
+
// << " m_coll=" << m_coll.size() << endl;
|
| 261 |
+
|
| 262 |
+
size_t erased = m_coll.erase(hypo);
|
| 263 |
+
UTIL_THROW_IF2(erased != 1, "couldn't erase hypo " << hypo);
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
void HypothesisColl::Clear()
|
| 267 |
+
{
|
| 268 |
+
m_sortedHypos = NULL;
|
| 269 |
+
m_coll.clear();
|
| 270 |
+
|
| 271 |
+
m_bestScore = -std::numeric_limits<float>::infinity();
|
| 272 |
+
m_worstScore = std::numeric_limits<float>::infinity();
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
std::string HypothesisColl::Debug(const System &system) const
|
| 276 |
+
{
|
| 277 |
+
stringstream out;
|
| 278 |
+
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
|
| 279 |
+
out << hypo->Debug(system);
|
| 280 |
+
out << std::endl << std::endl;
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
return out.str();
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/HypothesisColl.h
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* HypothesisColl.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 26 Feb 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include <unordered_set>
|
| 9 |
+
#include "HypothesisBase.h"
|
| 10 |
+
#include "MemPoolAllocator.h"
|
| 11 |
+
#include "Recycler.h"
|
| 12 |
+
#include "Array.h"
|
| 13 |
+
#include "legacy/Util2.h"
|
| 14 |
+
|
| 15 |
+
namespace Moses2
|
| 16 |
+
{
|
| 17 |
+
|
| 18 |
+
class ManagerBase;
|
| 19 |
+
class ArcLists;
|
| 20 |
+
|
| 21 |
+
typedef Array<const HypothesisBase*> Hypotheses;
|
| 22 |
+
|
| 23 |
+
////////////////////////////////////////////////////
|
| 24 |
+
class HypothesisColl
|
| 25 |
+
{
|
| 26 |
+
public:
|
| 27 |
+
HypothesisColl(const ManagerBase &mgr);
|
| 28 |
+
|
| 29 |
+
void Add(const ManagerBase &mgr,
|
| 30 |
+
HypothesisBase *hypo,
|
| 31 |
+
Recycler<HypothesisBase*> &hypoRecycle,
|
| 32 |
+
ArcLists &arcLists);
|
| 33 |
+
|
| 34 |
+
size_t GetSize() const {
|
| 35 |
+
return m_coll.size();
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
void Clear();
|
| 39 |
+
|
| 40 |
+
const Hypotheses &GetSortedAndPrunedHypos(
|
| 41 |
+
const ManagerBase &mgr,
|
| 42 |
+
ArcLists &arcLists) const;
|
| 43 |
+
|
| 44 |
+
const HypothesisBase *GetBestHypo() const;
|
| 45 |
+
|
| 46 |
+
template<typename T>
|
| 47 |
+
const T *GetBestHypo() const {
|
| 48 |
+
const HypothesisBase *hypo = GetBestHypo();
|
| 49 |
+
return hypo ? &hypo->Cast<T>() : NULL;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
void Delete(const HypothesisBase *hypo);
|
| 53 |
+
|
| 54 |
+
std::string Debug(const System &system) const;
|
| 55 |
+
|
| 56 |
+
protected:
|
| 57 |
+
typedef std::unordered_set<const HypothesisBase*,
|
| 58 |
+
UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
|
| 59 |
+
MemPoolAllocator<const HypothesisBase*> > _HCType;
|
| 60 |
+
|
| 61 |
+
_HCType m_coll;
|
| 62 |
+
mutable Hypotheses *m_sortedHypos;
|
| 63 |
+
|
| 64 |
+
SCORE m_bestScore;
|
| 65 |
+
SCORE m_worstScore;
|
| 66 |
+
|
| 67 |
+
StackAdd Add(const HypothesisBase *hypo);
|
| 68 |
+
|
| 69 |
+
void PruneHypos(const ManagerBase &mgr, ArcLists &arcLists);
|
| 70 |
+
void SortHypos(const ManagerBase &mgr, const HypothesisBase **sortedHypos) const;
|
| 71 |
+
|
| 72 |
+
};
|
| 73 |
+
|
| 74 |
+
} /* namespace Moses2 */
|
| 75 |
+
|
mosesdecoder/moses2/InputPathsBase.h
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* InputPaths.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include <vector>
|
| 11 |
+
#include "MemPool.h"
|
| 12 |
+
|
| 13 |
+
namespace Moses2
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
class InputType;
|
| 17 |
+
class System;
|
| 18 |
+
class ManagerBase;
|
| 19 |
+
class InputPathBase;
|
| 20 |
+
|
| 21 |
+
class InputPathsBase
|
| 22 |
+
{
|
| 23 |
+
typedef std::vector<InputPathBase*> Coll;
|
| 24 |
+
public:
|
| 25 |
+
InputPathsBase() {
|
| 26 |
+
}
|
| 27 |
+
virtual ~InputPathsBase();
|
| 28 |
+
|
| 29 |
+
//! iterators
|
| 30 |
+
typedef Coll::iterator iterator;
|
| 31 |
+
typedef Coll::const_iterator const_iterator;
|
| 32 |
+
|
| 33 |
+
const_iterator begin() const {
|
| 34 |
+
return m_inputPaths.begin();
|
| 35 |
+
}
|
| 36 |
+
const_iterator end() const {
|
| 37 |
+
return m_inputPaths.end();
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
iterator begin() {
|
| 41 |
+
return m_inputPaths.begin();
|
| 42 |
+
}
|
| 43 |
+
iterator end() {
|
| 44 |
+
return m_inputPaths.end();
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
virtual void Init(const InputType &input, const ManagerBase &mgr) = 0;
|
| 48 |
+
|
| 49 |
+
protected:
|
| 50 |
+
Coll m_inputPaths;
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
|
mosesdecoder/moses2/Main.cpp
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <iostream>
|
| 2 |
+
#include <memory>
|
| 3 |
+
#include <boost/pool/pool_alloc.hpp>
|
| 4 |
+
#include "Main.h"
|
| 5 |
+
#include "System.h"
|
| 6 |
+
#include "Phrase.h"
|
| 7 |
+
#include "TranslationTask.h"
|
| 8 |
+
#include "MemPoolAllocator.h"
|
| 9 |
+
#ifdef HAVE_XMLRPC_C
|
| 10 |
+
#include "server/Server.h"
|
| 11 |
+
#endif // HAVE_XMLRPC_C
|
| 12 |
+
|
| 13 |
+
#include "legacy/InputFileStream.h"
|
| 14 |
+
#include "legacy/Parameter.h"
|
| 15 |
+
#include "legacy/ThreadPool.h"
|
| 16 |
+
#include "legacy/Timer.h"
|
| 17 |
+
#include "legacy/Util2.h"
|
| 18 |
+
#include "util/usage.hh"
|
| 19 |
+
|
| 20 |
+
//#include <vld.h>
|
| 21 |
+
|
| 22 |
+
using namespace std;
|
| 23 |
+
|
| 24 |
+
//extern size_t g_numHypos;
|
| 25 |
+
|
| 26 |
+
int main(int argc, char** argv)
|
| 27 |
+
{
|
| 28 |
+
cerr << "Starting..." << endl;
|
| 29 |
+
|
| 30 |
+
Moses2::Timer timer;
|
| 31 |
+
timer.start();
|
| 32 |
+
//Temp();
|
| 33 |
+
|
| 34 |
+
Moses2::Parameter params;
|
| 35 |
+
if (!params.LoadParam(argc, argv)) {
|
| 36 |
+
return EXIT_FAILURE;
|
| 37 |
+
}
|
| 38 |
+
Moses2::System system(params);
|
| 39 |
+
timer.check("Loaded");
|
| 40 |
+
|
| 41 |
+
if (params.GetParam("show-weights")) {
|
| 42 |
+
return EXIT_SUCCESS;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
//cerr << "system.numThreads=" << system.options.server.numThreads << endl;
|
| 46 |
+
Moses2::ThreadPool pool(system.options.server.numThreads, system.cpuAffinityOffset, system.cpuAffinityOffsetIncr);
|
| 47 |
+
//cerr << "CREATED POOL" << endl;
|
| 48 |
+
|
| 49 |
+
if (params.GetParam("server")) {
|
| 50 |
+
std::cerr << "RUN SERVER" << std::endl;
|
| 51 |
+
run_as_server(system);
|
| 52 |
+
}
|
| 53 |
+
else {
|
| 54 |
+
std::cerr << "RUN BATCH" << std::endl;
|
| 55 |
+
batch_run(params, system, pool);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
cerr << "Decoding took " << timer.get_elapsed_time() << endl;
|
| 59 |
+
// cerr << "g_numHypos=" << g_numHypos << endl;
|
| 60 |
+
cerr << "Finished" << endl;
|
| 61 |
+
return EXIT_SUCCESS;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
| 65 |
+
void run_as_server(Moses2::System& system)
|
| 66 |
+
{
|
| 67 |
+
#ifdef HAVE_XMLRPC_C
|
| 68 |
+
Moses2::Server server(system.options.server, system);
|
| 69 |
+
server.run(system); // actually: don't return. see Server::run()
|
| 70 |
+
#else
|
| 71 |
+
UTIL_THROW2("Moses2 was compiled without xmlrpc-c. "
|
| 72 |
+
<< "No server functionality available.");
|
| 73 |
+
#endif
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
| 77 |
+
istream &GetInputStream(Moses2::Parameter ¶ms)
|
| 78 |
+
{
|
| 79 |
+
const Moses2::PARAM_VEC *vec = params.GetParam("input-file");
|
| 80 |
+
if (vec && vec->size()) {
|
| 81 |
+
Moses2::InputFileStream *stream = new Moses2::InputFileStream(vec->at(0));
|
| 82 |
+
return *stream;
|
| 83 |
+
} else {
|
| 84 |
+
return cin;
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
| 89 |
+
|
| 90 |
+
void batch_run(Moses2::Parameter& params, Moses2::System& system, Moses2::ThreadPool& pool)
|
| 91 |
+
{
|
| 92 |
+
istream& inStream = GetInputStream(params);
|
| 93 |
+
|
| 94 |
+
long translationId = 0;
|
| 95 |
+
string line;
|
| 96 |
+
while (getline(inStream, line)) {
|
| 97 |
+
//cerr << "line=" << line << endl;
|
| 98 |
+
boost::shared_ptr<Moses2::TranslationTask> task(new Moses2::TranslationTask(system, line, translationId));
|
| 99 |
+
|
| 100 |
+
//cerr << "START pool.Submit()" << endl;
|
| 101 |
+
pool.Submit(task);
|
| 102 |
+
//task->Run();
|
| 103 |
+
++translationId;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
pool.Stop(true);
|
| 107 |
+
|
| 108 |
+
if (&inStream != &cin) {
|
| 109 |
+
delete& inStream;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
//util::PrintUsage(std::cerr);
|
| 113 |
+
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
////////////////////////////////////////////////////////////////////////////////////////////////
|
mosesdecoder/moses2/Main.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Main.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 1 Apr 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
#include <iostream>
|
| 9 |
+
|
| 10 |
+
namespace Moses2
|
| 11 |
+
{
|
| 12 |
+
class Parameter;
|
| 13 |
+
class System;
|
| 14 |
+
class ThreadPool;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
std::istream &GetInputStream(Moses2::Parameter ¶ms);
|
| 18 |
+
void batch_run(Moses2::Parameter ¶ms, Moses2::System &system, Moses2::ThreadPool &pool);
|
| 19 |
+
void run_as_server(Moses2::System &system);
|
| 20 |
+
|
| 21 |
+
void Temp();
|
| 22 |
+
|
| 23 |
+
|
mosesdecoder/moses2/ManagerBase.cpp
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Manager.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#include <boost/foreach.hpp>
|
| 8 |
+
#include <vector>
|
| 9 |
+
#include <sstream>
|
| 10 |
+
#include "System.h"
|
| 11 |
+
#include "ManagerBase.h"
|
| 12 |
+
#include "Phrase.h"
|
| 13 |
+
#include "InputPathsBase.h"
|
| 14 |
+
#include "InputPathBase.h"
|
| 15 |
+
#include "TranslationModel/PhraseTable.h"
|
| 16 |
+
#include "legacy/Range.h"
|
| 17 |
+
#include "PhraseBased/Sentence.h"
|
| 18 |
+
|
| 19 |
+
using namespace std;
|
| 20 |
+
|
| 21 |
+
namespace Moses2
|
| 22 |
+
{
|
| 23 |
+
ManagerBase::ManagerBase(System &sys, const TranslationTask &task,
|
| 24 |
+
const std::string &inputStr, long translationId)
|
| 25 |
+
:system(sys)
|
| 26 |
+
,task(task)
|
| 27 |
+
,m_inputStr(inputStr)
|
| 28 |
+
,m_translationId(translationId)
|
| 29 |
+
,m_pool(NULL)
|
| 30 |
+
,m_systemPool(NULL)
|
| 31 |
+
,m_hypoRecycler(NULL)
|
| 32 |
+
,m_input(NULL)
|
| 33 |
+
{
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
ManagerBase::~ManagerBase()
|
| 37 |
+
{
|
| 38 |
+
system.featureFunctions.CleanUpAfterSentenceProcessing(*m_input);
|
| 39 |
+
|
| 40 |
+
GetPool().Reset();
|
| 41 |
+
GetHypoRecycler().Clear();
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
void ManagerBase::InitPools()
|
| 45 |
+
{
|
| 46 |
+
m_pool = &system.GetManagerPool();
|
| 47 |
+
m_systemPool = &system.GetSystemPool();
|
| 48 |
+
m_hypoRecycler = &system.GetHypoRecycler();
|
| 49 |
+
//cerr << "pool size " << m_pool->Size() << " " << m_systemPool->Size() << endl;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
}
|
| 53 |
+
|
mosesdecoder/moses2/ManagerBase.h
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Manager.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include <queue>
|
| 11 |
+
#include <cstddef>
|
| 12 |
+
#include <string>
|
| 13 |
+
#include <deque>
|
| 14 |
+
#include "Phrase.h"
|
| 15 |
+
#include "MemPool.h"
|
| 16 |
+
#include "Recycler.h"
|
| 17 |
+
#include "EstimatedScores.h"
|
| 18 |
+
#include "ArcLists.h"
|
| 19 |
+
#include "legacy/Bitmaps.h"
|
| 20 |
+
|
| 21 |
+
namespace Moses2
|
| 22 |
+
{
|
| 23 |
+
|
| 24 |
+
class System;
|
| 25 |
+
class TranslationTask;
|
| 26 |
+
class PhraseImpl;
|
| 27 |
+
class SearchNormal;
|
| 28 |
+
class Search;
|
| 29 |
+
class InputType;
|
| 30 |
+
class OutputCollector;
|
| 31 |
+
class HypothesisBase;
|
| 32 |
+
|
| 33 |
+
class ManagerBase
|
| 34 |
+
{
|
| 35 |
+
public:
|
| 36 |
+
System &system;
|
| 37 |
+
const TranslationTask &task;
|
| 38 |
+
mutable ArcLists arcLists;
|
| 39 |
+
|
| 40 |
+
ManagerBase(System &sys, const TranslationTask &task,
|
| 41 |
+
const std::string &inputStr, long translationId);
|
| 42 |
+
virtual ~ManagerBase();
|
| 43 |
+
virtual void Decode() = 0;
|
| 44 |
+
virtual std::string OutputBest() const = 0;
|
| 45 |
+
virtual std::string OutputNBest() = 0;
|
| 46 |
+
virtual std::string OutputTransOpt() = 0;
|
| 47 |
+
|
| 48 |
+
MemPool &GetPool() const {
|
| 49 |
+
return *m_pool;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
MemPool &GetSystemPool() const {
|
| 53 |
+
return *m_systemPool;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
Recycler<HypothesisBase*> &GetHypoRecycler() const {
|
| 57 |
+
return *m_hypoRecycler;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
const InputType &GetInput() const {
|
| 61 |
+
return *m_input;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
long GetTranslationId() const {
|
| 65 |
+
return m_translationId;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
protected:
|
| 69 |
+
std::string m_inputStr;
|
| 70 |
+
long m_translationId;
|
| 71 |
+
InputType *m_input;
|
| 72 |
+
|
| 73 |
+
mutable MemPool *m_pool, *m_systemPool;
|
| 74 |
+
mutable Recycler<HypothesisBase*> *m_hypoRecycler;
|
| 75 |
+
|
| 76 |
+
void InitPools();
|
| 77 |
+
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
}
|
| 81 |
+
|
mosesdecoder/moses2/MemPool.h
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* MemPool.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 28 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include <algorithm>
|
| 11 |
+
#include <iostream>
|
| 12 |
+
#include <vector>
|
| 13 |
+
#include <stdint.h>
|
| 14 |
+
#include <stdlib.h>
|
| 15 |
+
#include <limits>
|
| 16 |
+
#include <iostream>
|
| 17 |
+
|
| 18 |
+
namespace Moses2
|
| 19 |
+
{
|
| 20 |
+
|
| 21 |
+
class MemPool
|
| 22 |
+
{
|
| 23 |
+
struct Page {
|
| 24 |
+
uint8_t *mem;
|
| 25 |
+
uint8_t *end;
|
| 26 |
+
size_t size;
|
| 27 |
+
|
| 28 |
+
Page() = delete;
|
| 29 |
+
Page(std::size_t size);
|
| 30 |
+
~Page();
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
public:
|
| 34 |
+
MemPool(std::size_t initSize = 10240);
|
| 35 |
+
|
| 36 |
+
~MemPool();
|
| 37 |
+
|
| 38 |
+
uint8_t* Allocate(std::size_t size);
|
| 39 |
+
|
| 40 |
+
template<typename T>
|
| 41 |
+
T *Allocate() {
|
| 42 |
+
uint8_t *ret = Allocate(sizeof(T));
|
| 43 |
+
return (T*) ret;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
template<typename T>
|
| 47 |
+
T *Allocate(size_t num) {
|
| 48 |
+
size_t size = sizeof(T);
|
| 49 |
+
size_t m = size % 16;
|
| 50 |
+
size += m;
|
| 51 |
+
|
| 52 |
+
uint8_t *ret = Allocate(size * num);
|
| 53 |
+
return (T*) ret;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// re-use pool
|
| 57 |
+
void Reset();
|
| 58 |
+
|
| 59 |
+
size_t Size();
|
| 60 |
+
|
| 61 |
+
private:
|
| 62 |
+
uint8_t *More(std::size_t size);
|
| 63 |
+
|
| 64 |
+
std::vector<Page*> m_pages;
|
| 65 |
+
|
| 66 |
+
size_t m_currSize;
|
| 67 |
+
size_t m_currPage;
|
| 68 |
+
uint8_t *current_;
|
| 69 |
+
|
| 70 |
+
// no copying
|
| 71 |
+
MemPool(const MemPool &) = delete;
|
| 72 |
+
MemPool &operator=(const MemPool &) = delete;
|
| 73 |
+
};
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
}
|
| 77 |
+
|
mosesdecoder/moses2/MemPoolAllocator.h
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include "MemPool.h"
|
| 3 |
+
|
| 4 |
+
namespace Moses2
|
| 5 |
+
{
|
| 6 |
+
|
| 7 |
+
template<typename T>
|
| 8 |
+
class MemPoolAllocator
|
| 9 |
+
{
|
| 10 |
+
public:
|
| 11 |
+
typedef T value_type;
|
| 12 |
+
typedef T* pointer;
|
| 13 |
+
typedef const T* const_pointer;
|
| 14 |
+
typedef T& reference;
|
| 15 |
+
typedef const T& const_reference;
|
| 16 |
+
typedef std::size_t size_type;
|
| 17 |
+
typedef std::ptrdiff_t difference_type;
|
| 18 |
+
|
| 19 |
+
template<class U>
|
| 20 |
+
struct rebind {
|
| 21 |
+
typedef MemPoolAllocator<U> other;
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
MemPoolAllocator(Moses2::MemPool &pool) :
|
| 25 |
+
m_pool(pool) {
|
| 26 |
+
}
|
| 27 |
+
MemPoolAllocator(const MemPoolAllocator &other) :
|
| 28 |
+
m_pool(other.m_pool) {
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
template<class U>
|
| 32 |
+
MemPoolAllocator(const MemPoolAllocator<U>& other) :
|
| 33 |
+
m_pool(other.m_pool) {
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
size_type max_size() const {
|
| 37 |
+
return std::numeric_limits<size_type>::max();
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
void deallocate(pointer p, size_type n) {
|
| 41 |
+
//std::cerr << "deallocate " << p << " " << n << std::endl;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
|
| 45 |
+
//std::cerr << "allocate " << n << " " << hint << std::endl;
|
| 46 |
+
pointer ret = m_pool.Allocate<T>(n);
|
| 47 |
+
return ret;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
void construct(pointer p, const_reference val) {
|
| 51 |
+
//std::cerr << "construct " << p << " " << n << std::endl;
|
| 52 |
+
new ((void *) p) T(val);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
void destroy(pointer p) {
|
| 56 |
+
//std::cerr << "destroy " << p << " " << n << std::endl;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
// return address of values
|
| 60 |
+
pointer address (reference value) const {
|
| 61 |
+
return &value;
|
| 62 |
+
}
|
| 63 |
+
const_pointer address (const_reference value) const {
|
| 64 |
+
return &value;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
bool operator==(const MemPoolAllocator<T> &allocator) const {
|
| 68 |
+
return true;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
bool operator!=(const MemPoolAllocator<T> &allocator) const {
|
| 72 |
+
return false;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
MemPoolAllocator<T>& operator=(const MemPoolAllocator<T>& allocator) {
|
| 76 |
+
return *this;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
MemPool &m_pool;
|
| 80 |
+
protected:
|
| 81 |
+
};
|
| 82 |
+
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
|
mosesdecoder/moses2/Moses2Wrapper.h
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <string>
|
| 3 |
+
|
| 4 |
+
namespace Moses2 {
|
| 5 |
+
class Parameter;
|
| 6 |
+
class System;
|
| 7 |
+
extern "C" {
|
| 8 |
+
enum MosesApiErrorCode {
|
| 9 |
+
MS_API_OK,
|
| 10 |
+
MS_API_E_FAILURE,
|
| 11 |
+
MS_API_E_INPUT,
|
| 12 |
+
MS_API_E_TIMEOUT
|
| 13 |
+
};
|
| 14 |
+
}
|
| 15 |
+
class Moses2Wrapper
|
| 16 |
+
{
|
| 17 |
+
Parameter* m_param;
|
| 18 |
+
System* m_system;
|
| 19 |
+
|
| 20 |
+
public:
|
| 21 |
+
Moses2Wrapper(const std::string& filePath);
|
| 22 |
+
~Moses2Wrapper();
|
| 23 |
+
std::string Translate(const std::string& input, long id, bool nbest);
|
| 24 |
+
void UpdateLMPath(const std::string& filePath);
|
| 25 |
+
|
| 26 |
+
static char* CopyString(const char* str);
|
| 27 |
+
static void Free(void* ptr);
|
| 28 |
+
};
|
| 29 |
+
|
| 30 |
+
}
|
mosesdecoder/moses2/Phrase.h
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* PhraseImpl.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
|
| 10 |
+
#include <cstddef>
|
| 11 |
+
#include <string>
|
| 12 |
+
#include <sstream>
|
| 13 |
+
#include <iostream>
|
| 14 |
+
#include "Word.h"
|
| 15 |
+
#include "MemPool.h"
|
| 16 |
+
#include "TypeDef.h"
|
| 17 |
+
#include "legacy/FactorCollection.h"
|
| 18 |
+
#include "SCFG/Word.h"
|
| 19 |
+
#include <boost/functional/hash.hpp>
|
| 20 |
+
|
| 21 |
+
namespace Moses2
|
| 22 |
+
{
|
| 23 |
+
|
| 24 |
+
template<typename WORD>
|
| 25 |
+
class SubPhrase;
|
| 26 |
+
|
| 27 |
+
class Scores;
|
| 28 |
+
class PhraseTable;
|
| 29 |
+
class MemPool;
|
| 30 |
+
class System;
|
| 31 |
+
|
| 32 |
+
template<typename WORD>
|
| 33 |
+
class Phrase
|
| 34 |
+
{
|
| 35 |
+
public:
|
| 36 |
+
virtual ~Phrase() {
|
| 37 |
+
}
|
| 38 |
+
virtual const WORD& operator[](size_t pos) const = 0;
|
| 39 |
+
virtual size_t GetSize() const = 0;
|
| 40 |
+
|
| 41 |
+
virtual const WORD& Back() const {
|
| 42 |
+
assert(GetSize());
|
| 43 |
+
return (*this)[GetSize() - 1];
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
virtual size_t hash() const {
|
| 47 |
+
size_t seed = 0;
|
| 48 |
+
|
| 49 |
+
for (size_t i = 0; i < GetSize(); ++i) {
|
| 50 |
+
const WORD &word = (*this)[i];
|
| 51 |
+
size_t wordHash = word.hash();
|
| 52 |
+
boost::hash_combine(seed, wordHash);
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
return seed;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
virtual bool operator==(const Phrase &compare) const {
|
| 59 |
+
if (GetSize() != compare.GetSize()) {
|
| 60 |
+
return false;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
for (size_t i = 0; i < GetSize(); ++i) {
|
| 64 |
+
const WORD &word = (*this)[i];
|
| 65 |
+
const WORD &otherWord = compare[i];
|
| 66 |
+
if (word != otherWord) {
|
| 67 |
+
return false;
|
| 68 |
+
}
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
return true;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
virtual bool operator!=(const Phrase &compare) const {
|
| 75 |
+
return !((*this) == compare);
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
virtual std::string GetString(const FactorList &factorTypes) const {
|
| 79 |
+
if (GetSize() == 0) {
|
| 80 |
+
return "";
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
std::stringstream ret;
|
| 84 |
+
|
| 85 |
+
const WORD &word = (*this)[0];
|
| 86 |
+
ret << word.GetString(factorTypes);
|
| 87 |
+
for (size_t i = 1; i < GetSize(); ++i) {
|
| 88 |
+
const WORD &word = (*this)[i];
|
| 89 |
+
ret << " " << word.GetString(factorTypes);
|
| 90 |
+
}
|
| 91 |
+
return ret.str();
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
|
| 95 |
+
|
| 96 |
+
virtual std::string Debug(const System &system) const {
|
| 97 |
+
std::stringstream out;
|
| 98 |
+
size_t size = GetSize();
|
| 99 |
+
if (size) {
|
| 100 |
+
out << (*this)[0].Debug(system);
|
| 101 |
+
for (size_t i = 1; i < size; ++i) {
|
| 102 |
+
const WORD &word = (*this)[i];
|
| 103 |
+
out << " " << word.Debug(system);
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
return out.str();
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
virtual void OutputToStream(const System &system, std::ostream &out) const {
|
| 111 |
+
size_t size = GetSize();
|
| 112 |
+
if (size) {
|
| 113 |
+
(*this)[0].OutputToStream(system, out);
|
| 114 |
+
for (size_t i = 1; i < size; ++i) {
|
| 115 |
+
const WORD &word = (*this)[i];
|
| 116 |
+
out << " ";
|
| 117 |
+
word.OutputToStream(system, out);
|
| 118 |
+
}
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
};
|
| 124 |
+
|
| 125 |
+
////////////////////////////////////////////////////////////////////////
|
| 126 |
+
template<typename WORD>
|
| 127 |
+
class PhraseOrdererLexical
|
| 128 |
+
{
|
| 129 |
+
public:
|
| 130 |
+
bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const {
|
| 131 |
+
size_t minSize = std::min(a.GetSize(), b.GetSize());
|
| 132 |
+
for (size_t i = 0; i < minSize; ++i) {
|
| 133 |
+
const Word &aWord = a[i];
|
| 134 |
+
const Word &bWord = b[i];
|
| 135 |
+
int cmp = aWord.Compare(bWord);
|
| 136 |
+
//std::cerr << "WORD: " << aWord << " ||| " << bWord << " ||| " << lessThan << std::endl;
|
| 137 |
+
if (cmp) {
|
| 138 |
+
return (cmp < 0);
|
| 139 |
+
}
|
| 140 |
+
}
|
| 141 |
+
return a.GetSize() < b.GetSize();
|
| 142 |
+
}
|
| 143 |
+
};
|
| 144 |
+
|
| 145 |
+
}
|
| 146 |
+
|
mosesdecoder/moses2/Recycler.cpp
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Recycler.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 2 Jan 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "Recycler.h"
|
| 9 |
+
|
| 10 |
+
namespace Moses2
|
| 11 |
+
{
|
| 12 |
+
|
| 13 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/Scores.h
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Scores.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#pragma once
|
| 9 |
+
#include <iostream>
|
| 10 |
+
#include <string>
|
| 11 |
+
#include "TypeDef.h"
|
| 12 |
+
#include "MemPool.h"
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
class FeatureFunction;
|
| 18 |
+
class FeatureFunctions;
|
| 19 |
+
class System;
|
| 20 |
+
|
| 21 |
+
class Scores
|
| 22 |
+
{
|
| 23 |
+
public:
|
| 24 |
+
Scores(const System &system, MemPool &pool, size_t numScores);
|
| 25 |
+
Scores(const System &system, MemPool &pool, size_t numScores,
|
| 26 |
+
const Scores &origScores);
|
| 27 |
+
|
| 28 |
+
virtual ~Scores();
|
| 29 |
+
|
| 30 |
+
SCORE GetTotalScore() const {
|
| 31 |
+
return m_total;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
const SCORE *GetScores(const FeatureFunction &featureFunction) const;
|
| 35 |
+
|
| 36 |
+
void Reset(const System &system);
|
| 37 |
+
|
| 38 |
+
void CreateFromString(const std::string &str,
|
| 39 |
+
const FeatureFunction &featureFunction, const System &system,
|
| 40 |
+
bool transformScores);
|
| 41 |
+
|
| 42 |
+
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
|
| 43 |
+
const SCORE &score);
|
| 44 |
+
|
| 45 |
+
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
|
| 46 |
+
const SCORE &score, size_t offset);
|
| 47 |
+
|
| 48 |
+
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
|
| 49 |
+
const std::vector<SCORE> &scores);
|
| 50 |
+
|
| 51 |
+
void PlusEquals(const System &system, const FeatureFunction &featureFunction,
|
| 52 |
+
SCORE scores[]);
|
| 53 |
+
|
| 54 |
+
void PlusEquals(const System &system, const Scores &scores);
|
| 55 |
+
|
| 56 |
+
void MinusEquals(const System &system, const Scores &scores);
|
| 57 |
+
|
| 58 |
+
void Assign(const System &system, const FeatureFunction &featureFunction,
|
| 59 |
+
const SCORE &score);
|
| 60 |
+
|
| 61 |
+
void Assign(const System &system, const FeatureFunction &featureFunction,
|
| 62 |
+
const std::vector<SCORE> &scores);
|
| 63 |
+
|
| 64 |
+
std::string Debug(const System &system) const;
|
| 65 |
+
|
| 66 |
+
void OutputBreakdown(std::ostream &out, const System &system) const;
|
| 67 |
+
|
| 68 |
+
// static functions to work out estimated scores
|
| 69 |
+
static SCORE CalcWeightedScore(const System &system,
|
| 70 |
+
const FeatureFunction &featureFunction, SCORE scores[]);
|
| 71 |
+
|
| 72 |
+
static SCORE CalcWeightedScore(const System &system,
|
| 73 |
+
const FeatureFunction &featureFunction, SCORE score);
|
| 74 |
+
|
| 75 |
+
protected:
|
| 76 |
+
SCORE *m_scores;
|
| 77 |
+
SCORE m_total;
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
}
|
| 81 |
+
|
mosesdecoder/moses2/SubPhrase.h
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <sstream>
|
| 3 |
+
#include "Phrase.h"
|
| 4 |
+
#include "Word.h"
|
| 5 |
+
#include "SCFG/Word.h"
|
| 6 |
+
|
| 7 |
+
namespace Moses2
|
| 8 |
+
{
|
| 9 |
+
class System;
|
| 10 |
+
|
| 11 |
+
template<typename WORD>
|
| 12 |
+
class SubPhrase: public Phrase<WORD>
|
| 13 |
+
{
|
| 14 |
+
public:
|
| 15 |
+
SubPhrase(const Phrase<WORD> &origPhrase, size_t start, size_t size)
|
| 16 |
+
:m_origPhrase(&origPhrase)
|
| 17 |
+
,m_start(start)
|
| 18 |
+
,m_size(size)
|
| 19 |
+
{}
|
| 20 |
+
|
| 21 |
+
virtual const WORD& operator[](size_t pos) const {
|
| 22 |
+
return (*m_origPhrase)[pos + m_start];
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
virtual size_t GetSize() const {
|
| 26 |
+
return m_size;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
SubPhrase GetSubPhrase(size_t start, size_t size) const {
|
| 30 |
+
SubPhrase ret(*m_origPhrase, m_start + start, size);
|
| 31 |
+
return ret;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
virtual std::string Debug(const System &system) const {
|
| 35 |
+
std::stringstream out;
|
| 36 |
+
if (GetSize()) {
|
| 37 |
+
out << (*this)[0].Debug(system);
|
| 38 |
+
for (size_t i = 1; i < GetSize(); ++i) {
|
| 39 |
+
const WORD &word = (*this)[i];
|
| 40 |
+
out << " " << word.Debug(system);
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
return out.str();
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
protected:
|
| 48 |
+
const Phrase<WORD> *m_origPhrase;
|
| 49 |
+
size_t m_start, m_size;
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
}
|
| 54 |
+
|
mosesdecoder/moses2/TargetPhrase.cpp
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* TargetPhrase.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 26 Apr 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "TargetPhrase.h"
|
| 9 |
+
#include "System.h"
|
| 10 |
+
#include "Scores.h"
|
| 11 |
+
|
| 12 |
+
namespace Moses2
|
| 13 |
+
{
|
| 14 |
+
|
| 15 |
+
} /* namespace Moses2 */
|
mosesdecoder/moses2/TranslationTask.cpp
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "TranslationTask.h"
|
| 2 |
+
#include "System.h"
|
| 3 |
+
#include "InputType.h"
|
| 4 |
+
#include "PhraseBased/Manager.h"
|
| 5 |
+
#include "SCFG/Manager.h"
|
| 6 |
+
|
| 7 |
+
using namespace std;
|
| 8 |
+
|
| 9 |
+
namespace Moses2
|
| 10 |
+
{
|
| 11 |
+
|
| 12 |
+
TranslationTask::TranslationTask(System &system,
|
| 13 |
+
const std::string &line,
|
| 14 |
+
long translationId)
|
| 15 |
+
{
|
| 16 |
+
if (system.isPb) {
|
| 17 |
+
m_mgr = new Manager(system, *this, line, translationId);
|
| 18 |
+
} else {
|
| 19 |
+
m_mgr = new SCFG::Manager(system, *this, line, translationId);
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
TranslationTask::~TranslationTask()
|
| 24 |
+
{
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
std::string TranslationTask::ReturnTranslation(bool nbest) const
|
| 28 |
+
{
|
| 29 |
+
m_mgr->Decode();
|
| 30 |
+
string out;
|
| 31 |
+
if (nbest) {
|
| 32 |
+
out = m_mgr->OutputNBest() + "\n";
|
| 33 |
+
}
|
| 34 |
+
else {
|
| 35 |
+
out = m_mgr->OutputBest() + "\n";
|
| 36 |
+
}
|
| 37 |
+
delete m_mgr;
|
| 38 |
+
return out;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
void TranslationTask::Run()
|
| 42 |
+
{
|
| 43 |
+
|
| 44 |
+
m_mgr->Decode();
|
| 45 |
+
|
| 46 |
+
string out;
|
| 47 |
+
|
| 48 |
+
out = m_mgr->OutputBest() + "\n";
|
| 49 |
+
m_mgr->system.bestCollector->Write(m_mgr->GetTranslationId(), out);
|
| 50 |
+
|
| 51 |
+
if (m_mgr->system.options.nbest.nbest_size) {
|
| 52 |
+
out = m_mgr->OutputNBest();
|
| 53 |
+
m_mgr->system.nbestCollector->Write(m_mgr->GetTranslationId(), out);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
if (!m_mgr->system.options.output.detailed_transrep_filepath.empty()) {
|
| 57 |
+
out = m_mgr->OutputTransOpt();
|
| 58 |
+
m_mgr->system.detailedTranslationCollector->Write(m_mgr->GetTranslationId(), out);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
delete m_mgr;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
}
|
| 65 |
+
|
mosesdecoder/moses2/TrellisPaths.h
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* TrellisPaths.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 16 Mar 2016
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
#include <vector>
|
| 10 |
+
#include <queue>
|
| 11 |
+
#include "PhraseBased/TrellisPath.h"
|
| 12 |
+
|
| 13 |
+
namespace Moses2
|
| 14 |
+
{
|
| 15 |
+
|
| 16 |
+
template<typename T>
|
| 17 |
+
struct CompareTrellisPath {
|
| 18 |
+
bool operator()(const T* pathA, const T* pathB) const {
|
| 19 |
+
return (pathA->GetFutureScore() < pathB->GetFutureScore());
|
| 20 |
+
}
|
| 21 |
+
};
|
| 22 |
+
|
| 23 |
+
template<typename T>
|
| 24 |
+
class TrellisPaths
|
| 25 |
+
{
|
| 26 |
+
public:
|
| 27 |
+
TrellisPaths() {}
|
| 28 |
+
|
| 29 |
+
virtual ~TrellisPaths() {
|
| 30 |
+
while (!empty()) {
|
| 31 |
+
T *path = Get();
|
| 32 |
+
delete path;
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
bool empty() const {
|
| 37 |
+
return m_coll.empty();
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
//! add a new entry into collection
|
| 41 |
+
void Add(T *trellisPath) {
|
| 42 |
+
m_coll.push(trellisPath);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
T *Get() {
|
| 46 |
+
T *top = m_coll.top();
|
| 47 |
+
|
| 48 |
+
// Detach
|
| 49 |
+
m_coll.pop();
|
| 50 |
+
return top;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
size_t GetSize() const {
|
| 54 |
+
return m_coll.size();
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
protected:
|
| 58 |
+
typedef std::priority_queue<T*, std::vector<T*>,
|
| 59 |
+
CompareTrellisPath<T> > CollectionType;
|
| 60 |
+
CollectionType m_coll;
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
} /* namespace Moses2 */
|
| 64 |
+
|
mosesdecoder/moses2/TypeDef.h
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* TypeDef.h
|
| 3 |
+
*
|
| 4 |
+
* Created on: 23 Oct 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
#pragma once
|
| 8 |
+
|
| 9 |
+
#include <cstddef>
|
| 10 |
+
#include <vector>
|
| 11 |
+
#include <istream>
|
| 12 |
+
#include "Vector.h"
|
| 13 |
+
|
| 14 |
+
namespace Moses2
|
| 15 |
+
{
|
| 16 |
+
|
| 17 |
+
class HypothesisBase;
|
| 18 |
+
|
| 19 |
+
#define NOT_FOUND std::numeric_limits<size_t>::max()
|
| 20 |
+
const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
|
| 21 |
+
const size_t DEFAULT_MAX_CHART_SPAN = 20;
|
| 22 |
+
const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
|
| 23 |
+
const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
|
| 24 |
+
const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
|
| 25 |
+
const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
|
| 26 |
+
|
| 27 |
+
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
|
| 28 |
+
const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
|
| 29 |
+
const float LOWEST_SCORE = -100.0f;
|
| 30 |
+
|
| 31 |
+
const float DEFAULT_BEAM_WIDTH = 0.00001f;
|
| 32 |
+
const float DEFAULT_EARLY_DISCARDING_THRESHOLD = 0.0f;
|
| 33 |
+
const float DEFAULT_TRANSLATION_OPTION_THRESHOLD = 0.0f;
|
| 34 |
+
|
| 35 |
+
#ifndef BOS_
|
| 36 |
+
#define BOS_ "<s>" //Beginning of sentence symbol
|
| 37 |
+
#endif
|
| 38 |
+
#ifndef EOS_
|
| 39 |
+
#define EOS_ "</s>" //End of sentence symbol
|
| 40 |
+
#endif
|
| 41 |
+
|
| 42 |
+
typedef size_t FactorType;
|
| 43 |
+
typedef float SCORE;
|
| 44 |
+
typedef std::vector<FactorType> FactorList;
|
| 45 |
+
|
| 46 |
+
// Note: StaticData uses SearchAlgorithm to determine whether the translation
|
| 47 |
+
// model is phrase-based or syntax-based. If you add a syntax-based search
|
| 48 |
+
// algorithm here then you should also update StaticData::IsSyntax().
|
| 49 |
+
enum SearchAlgorithm {
|
| 50 |
+
Normal = 0, CubePruning = 1,
|
| 51 |
+
//,CubeGrowing = 2
|
| 52 |
+
CYKPlus = 3,
|
| 53 |
+
NormalBatch = 4,
|
| 54 |
+
ChartIncremental = 5,
|
| 55 |
+
SyntaxS2T = 6,
|
| 56 |
+
SyntaxT2S = 7,
|
| 57 |
+
SyntaxT2S_SCFG = 8,
|
| 58 |
+
SyntaxF2S = 9,
|
| 59 |
+
CubePruningPerMiniStack = 10,
|
| 60 |
+
CubePruningPerBitmap = 11,
|
| 61 |
+
CubePruningCardinalStack = 12,
|
| 62 |
+
CubePruningBitmapStack = 13,
|
| 63 |
+
CubePruningMiniStack = 14,
|
| 64 |
+
DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
enum InputTypeEnum {
|
| 68 |
+
SentenceInput = 0,
|
| 69 |
+
ConfusionNetworkInput = 1,
|
| 70 |
+
WordLatticeInput = 2,
|
| 71 |
+
TreeInputType = 3,
|
| 72 |
+
//,WordLatticeInput2 = 4,
|
| 73 |
+
TabbedSentenceInput = 5,
|
| 74 |
+
ForestInputType = 6,
|
| 75 |
+
SentenceInputWithCandidates = 7,
|
| 76 |
+
};
|
| 77 |
+
|
| 78 |
+
enum XmlInputType {
|
| 79 |
+
XmlPassThrough = 0,
|
| 80 |
+
XmlIgnore = 1,
|
| 81 |
+
XmlExclusive = 2,
|
| 82 |
+
XmlInclusive = 3,
|
| 83 |
+
XmlConstraint = 4
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
enum WordAlignmentSort {
|
| 87 |
+
NoSort = 0,
|
| 88 |
+
TargetOrder = 1
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
enum S2TParsingAlgorithm {
|
| 92 |
+
RecursiveCYKPlus,
|
| 93 |
+
Scope3
|
| 94 |
+
};
|
| 95 |
+
|
| 96 |
+
enum SourceLabelOverlap {
|
| 97 |
+
SourceLabelOverlapAdd = 0,
|
| 98 |
+
SourceLabelOverlapReplace = 1,
|
| 99 |
+
SourceLabelOverlapDiscard = 2
|
| 100 |
+
};
|
| 101 |
+
|
| 102 |
+
/////////////////////////
|
| 103 |
+
// MOSES2 only
|
| 104 |
+
|
| 105 |
+
class StackAdd
|
| 106 |
+
{
|
| 107 |
+
public:
|
| 108 |
+
bool added;
|
| 109 |
+
HypothesisBase *other;
|
| 110 |
+
|
| 111 |
+
StackAdd() {
|
| 112 |
+
}
|
| 113 |
+
StackAdd(bool vadded, HypothesisBase *vOther) :
|
| 114 |
+
added(vadded), other(vOther) {
|
| 115 |
+
}
|
| 116 |
+
};
|
| 117 |
+
|
| 118 |
+
class Hypothesis;
|
| 119 |
+
typedef Vector<Hypothesis*> Batch;
|
| 120 |
+
|
| 121 |
+
class Factor;
|
| 122 |
+
typedef std::vector<const Factor*> Context;
|
| 123 |
+
|
| 124 |
+
}
|
| 125 |
+
|
mosesdecoder/moses2/Vector.cpp
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* Vector.cpp
|
| 3 |
+
*
|
| 4 |
+
* Created on: 7 Dec 2015
|
| 5 |
+
* Author: hieu
|
| 6 |
+
*/
|
| 7 |
+
|
| 8 |
+
#include "Vector.h"
|
| 9 |
+
|
| 10 |
+
namespace Moses2
|
| 11 |
+
{
|
| 12 |
+
|
| 13 |
+
}
|
| 14 |
+
|