diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp b/mosesdecoder/moses/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8e4d1641ffd2f6a9f94cbd42baef5db2b2609e72
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
@@ -0,0 +1,94 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifdef HAVE_CMPH
+
+#include "CmphStringVectorAdapter.h"
+
+namespace Moses
+{
+
+void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
+{
+  delete[] key;
+}
+
+void CmphStringVectorAdapterRewind(void *data)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  cmph_vector->position = 0;
+}
+
+//************************************************************************//
+
+cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v)
+{
+  cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+  cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+  assert(key_source);
+  assert(cmph_vector);
+
+  cmph_vector->vector = (void *)&v;
+  cmph_vector->position = 0;
+  key_source->data = (void *)cmph_vector;
+  key_source->nkeys = v.size();
+
+  return key_source;
+}
+
+int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  std::vector<std::string>* v = (std::vector<std::string>*)cmph_vector->vector;
+  size_t size;
+  *keylen = (*v)[cmph_vector->position].size();
+  size = *keylen;
+  *key = new char[size + 1];
+  std::string temp = (*v)[cmph_vector->position];
+  strcpy(*key, temp.c_str());
+  cmph_vector->position = cmph_vector->position + 1;
+  return (int)(*keylen);
+}
+
+void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
+{
+  delete[] key;
+}
+
+void CmphVectorAdapterRewind(void *data)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  cmph_vector->position = 0;
+}
+
+cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v)
+{
+  cmph_io_adapter_t * key_source = CmphVectorAdapterNew(v);
+
+  key_source->read = CmphVectorAdapterRead;
+  key_source->dispose = CmphVectorAdapterDispose;
+  key_source->rewind = CmphVectorAdapterRewind;
+  return key_source;
+}
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/CmphStringVectorAdapter.h b/mosesdecoder/moses/TranslationModel/CompactPT/CmphStringVectorAdapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a532c289d3c2b5d8ceb511667e0d5c20ef8770f
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/CmphStringVectorAdapter.h
@@ -0,0 +1,105 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_CmphStringVectorAdapterNew_h
+#define moses_CmphStringVectorAdapterNew_h
+
+#include <cassert>
+#include <cstring>
+
+#ifdef HAVE_CMPH
+#include "cmph.h"
+
+#include "StringVector.h"
+
+namespace Moses
+{
+
+typedef struct {
+  void *vector;
+  cmph_uint32 position;
+}
+cmph_vector_t;
+
+
+template <typename ValueT, typename PosT, template <typename> class Allocator>
+cmph_io_adapter_t *CmphStringVectorAdapterNew(StringVector<ValueT, PosT, Allocator>& sv)
+{
+  cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+  cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+  assert(key_source);
+  assert(cmph_vector);
+
+  cmph_vector->vector = (void *)&sv;
+  cmph_vector->position = 0;
+  key_source->data = (void *)cmph_vector;
+  key_source->nkeys = sv.size();
+
+  return key_source;
+}
+
+template <typename ValueT, typename PosT, template <typename> class Allocator>
+int CmphStringVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  StringVector<ValueT, PosT, Allocator>* sv = (StringVector<ValueT, PosT, Allocator>*)cmph_vector->vector;
+  size_t size;
+  *keylen = (*sv)[cmph_vector->position].size();
+  size = *keylen;
+  *key = new char[size + 1];
+  std::string temp = (*sv)[cmph_vector->position];
+  std::strcpy(*key, temp.c_str());
+  cmph_vector->position = cmph_vector->position + 1;
+  return (int)(*keylen);
+}
+
+void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
+
+void CmphStringVectorAdapterRewind(void *data);
+
+template <typename ValueT, typename PosT, template <typename> class Allocator>
+cmph_io_adapter_t* CmphStringVectorAdapter(StringVector<ValueT, PosT, Allocator>& sv)
+{
+  cmph_io_adapter_t * key_source = CmphStringVectorAdapterNew(sv);
+
+  key_source->read = CmphStringVectorAdapterRead<ValueT, PosT, Allocator>;
+  key_source->dispose = CmphStringVectorAdapterDispose;
+  key_source->rewind = CmphStringVectorAdapterRewind;
+  return key_source;
+}
+
+//************************************************************************//
+
+cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v);
+
+int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen);
+
+void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
+
+void CmphVectorAdapterRewind(void *data);
+
+cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v);
+
+}
+
+#endif
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp b/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..484f6c8c1140aae473222ce78cd646ab66b7b870
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
@@ -0,0 +1,195 @@
+// -*- c++ -*-
+// vim:tabstop=2
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LexicalReorderingTableCompact.h"
+#include "moses/parameters/OOVHandlingOptions.h"
+
+namespace Moses
+{
+bool LexicalReorderingTableCompact::s_inMemoryByDefault = false;
+
+LexicalReorderingTableCompact::
+LexicalReorderingTableCompact(const std::string& filePath,
+                              const std::vector<FactorType>& f_factors,
+                              const std::vector<FactorType>& e_factors,
+                              const std::vector<FactorType>& c_factors)
+  : LexicalReorderingTable(f_factors, e_factors, c_factors)
+  , m_inMemory(s_inMemoryByDefault)
+  , m_numScoreComponent(6)
+  , m_multipleScoreTrees(true)
+  , m_hash(10, 16)
+  , m_scoreTrees(1)
+{
+  Load(filePath);
+}
+
+LexicalReorderingTableCompact::
+LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
+                              const std::vector<FactorType>& e_factors,
+                              const std::vector<FactorType>& c_factors)
+  : LexicalReorderingTable(f_factors, e_factors, c_factors)
+  , m_inMemory(s_inMemoryByDefault)
+  , m_numScoreComponent(6)
+  , m_multipleScoreTrees(true)
+  , m_hash(10, 16)
+  , m_scoreTrees(1)
+{ }
+
+LexicalReorderingTableCompact::
+~LexicalReorderingTableCompact()
+{
+  for(size_t i = 0; i < m_scoreTrees.size(); i++)
+    delete m_scoreTrees[i];
+}
+
+std::vector<float>
+LexicalReorderingTableCompact::
+GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
+{
+  std::string key;
+  Scores scores;
+
+  if(0 == c.GetSize())
+    key = MakeKey(f, e, c);
+  else
+    for(size_t i = 0; i <= c.GetSize(); ++i) {
+      Phrase sub_c(c.GetSubString(Range(i,c.GetSize()-1)));
+      key = MakeKey(f,e,sub_c);
+    }
+
+  size_t index = m_hash[key];
+  if(m_hash.GetSize() != index) {
+    std::string scoresString;
+    if(m_inMemory)
+      scoresString = m_scoresMemory[index].str();
+    else
+      scoresString = m_scoresMapped[index].str();
+
+    BitWrapper<> bitStream(scoresString);
+    for(size_t i = 0; i < m_numScoreComponent; i++)
+      scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
+
+    return scores;
+  }
+
+  return Scores();
+}
+
+std::string
+LexicalReorderingTableCompact::
+MakeKey(const Phrase& f,
+        const Phrase& e,
+        const Phrase& c) const
+{
+  return MakeKey(Trim(f.GetStringRep(m_FactorsF)),
+                 Trim(e.GetStringRep(m_FactorsE)),
+                 Trim(c.GetStringRep(m_FactorsC)));
+}
+
+std::string
+LexicalReorderingTableCompact::
+MakeKey(const std::string& f,
+        const std::string& e,
+        const std::string& c) const
+{
+  std::string key;
+  if(!f.empty()) key += f;
+  if(!m_FactorsE.empty()) {
+    if(!key.empty()) key += " ||| ";
+    key += e;
+  }
+  if(!m_FactorsC.empty()) {
+    if(!key.empty()) key += " ||| ";
+    key += c;
+  }
+  key += " ||| ";
+  return key;
+}
+
+LexicalReorderingTable*
+LexicalReorderingTableCompact::
+CheckAndLoad
+(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+{
+#ifdef HAVE_CMPH
+  std::string minlexr = ".minlexr";
+  // file name is specified without suffix
+  if(FileExists(filePath + minlexr)) {
+    //there exists a compact binary version use that
+    VERBOSE(2,"Using compact lexical reordering table" << std::endl);
+    return new LexicalReorderingTableCompact(filePath + minlexr, f_factors, e_factors, c_factors);
+  }
+  // file name is specified with suffix
+  if(filePath.substr(filePath.length() - minlexr.length(), minlexr.length()) == minlexr
+      && FileExists(filePath)) {
+    //there exists a compact binary version use that
+    VERBOSE(2,"Using compact lexical reordering table" << std::endl);
+    return new LexicalReorderingTableCompact(filePath, f_factors, e_factors, c_factors);
+  }
+#endif
+  return 0;
+}
+
+void
+LexicalReorderingTableCompact::
+Load(std::string filePath)
+{
+  std::FILE* pFile = std::fopen(filePath.c_str(), "r");
+  UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
+
+  //if(m_inMemory)
+  m_hash.Load(pFile);
+  //else
+  //m_hash.LoadIndex(pFile);
+
+  size_t read = 0;
+  read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
+  read += std::fread(&m_multipleScoreTrees,
+                     sizeof(m_multipleScoreTrees), 1, pFile);
+
+  if(m_multipleScoreTrees) {
+    m_scoreTrees.resize(m_numScoreComponent);
+    for(size_t i = 0; i < m_numScoreComponent; i++)
+      m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
+  } else {
+    m_scoreTrees.resize(1);
+    m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
+  }
+
+  if(m_inMemory)
+    m_scoresMemory.load(pFile, false);
+  else
+    m_scoresMapped.load(pFile, true);
+}
+
+void
+LexicalReorderingTableCompact::
+SetStaticDefaultParameters(Parameter const& param)
+{
+  param.SetParameter(s_inMemoryByDefault, "minlexr-memory", false);
+}
+
+
+}
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h b/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce4f5b10e9cd88e503ff902ee593e7f16ef19fe8
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h
@@ -0,0 +1,94 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LexicalReorderingTableCompact_h
+#define moses_LexicalReorderingTableCompact_h
+
+#include "moses/FF/LexicalReordering/LexicalReorderingTable.h"
+#include "moses/StaticData.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/GenerationDictionary.h"
+#include "moses/TargetPhrase.h"
+#include "moses/TargetPhraseCollection.h"
+
+#include "BlockHashIndex.h"
+#include "CanonicalHuffman.h"
+#include "StringVector.h"
+
+namespace Moses
+{
+
+class LexicalReorderingTableCompact:
+  public LexicalReorderingTable
+{
+private:
+  static bool s_inMemoryByDefault;
+  bool m_inMemory;
+
+  size_t m_numScoreComponent;
+  bool m_multipleScoreTrees;
+
+  BlockHashIndex m_hash;
+
+  typedef CanonicalHuffman<float> ScoreTree;
+  std::vector<ScoreTree*> m_scoreTrees;
+
+  StringVector<unsigned char, unsigned long, MmapAllocator>  m_scoresMapped;
+  StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
+
+  std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
+  std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
+
+public:
+  LexicalReorderingTableCompact(const std::string& filePath,
+                                const std::vector<FactorType>& f_factors,
+                                const std::vector<FactorType>& e_factors,
+                                const std::vector<FactorType>& c_factors);
+
+  LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
+                                const std::vector<FactorType>& e_factors,
+                                const std::vector<FactorType>& c_factors);
+
+  virtual
+  ~LexicalReorderingTableCompact();
+
+  virtual
+  std::vector<float>
+  GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+
+  static
+  LexicalReorderingTable*
+  CheckAndLoad(const std::string& filePath,
+               const std::vector<FactorType>& f_factors,
+               const std::vector<FactorType>& e_factors,
+               const std::vector<FactorType>& c_factors);
+
+  void
+  Load(std::string filePath);
+
+  static void
+  SetStaticDefaultParameters(Parameter const& param);
+
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.h b/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.h
new file mode 100644
index 0000000000000000000000000000000000000000..1bf8444fe8d62de9d57ab7570c319d7279004aa8
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/LexicalReorderingTableCreator.h
@@ -0,0 +1,143 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LexicalReorderingTableCreator_h
+#define moses_LexicalReorderingTableCreator_h
+
+#include "PhraseTableCreator.h"
+
+namespace Moses
+{
+
+class LexicalReorderingTableCreator
+{
+private:
+  std::string m_inPath;
+  std::string m_outPath;
+  std::string m_tempfilePath;
+
+  std::FILE* m_outFile;
+
+  size_t m_orderBits;
+  size_t m_fingerPrintBits;
+
+  size_t m_numScoreComponent;
+
+  bool m_multipleScoreTrees;
+  bool m_quantize;
+
+  std::string m_separator;
+
+  BlockHashIndex m_hash;
+
+  typedef Counter<float> ScoreCounter;
+  typedef CanonicalHuffman<float> ScoreTree;
+
+  std::vector<ScoreCounter*> m_scoreCounters;
+  std::vector<ScoreTree*> m_scoreTrees;
+
+  StringVector<unsigned char, unsigned long, MmapAllocator>* m_encodedScores;
+  StringVector<unsigned char, unsigned long, MmapAllocator>* m_compressedScores;
+
+  std::priority_queue<PackedItem> m_queue;
+  long m_lastFlushedLine;
+  long m_lastFlushedSourceNum;
+  std::string m_lastFlushedSourcePhrase;
+  std::vector<std::string> m_lastRange;
+
+#ifdef WITH_THREADS
+  size_t m_threads;
+#endif
+
+  void PrintInfo();
+
+  void EncodeScores();
+  void CalcHuffmanCodes();
+  void CompressScores();
+  void Save();
+
+  std::string MakeSourceTargetKey(std::string&, std::string&);
+
+  std::string EncodeLine(std::vector<std::string>& tokens);
+  void AddEncodedLine(PackedItem& pi);
+  void FlushEncodedQueue(bool force = false);
+
+  std::string CompressEncodedScores(std::string &encodedScores);
+  void AddCompressedScores(PackedItem& pi);
+  void FlushCompressedQueue(bool force = false);
+
+public:
+  LexicalReorderingTableCreator(std::string inPath,
+                                std::string outPath,
+                                std::string tempfilePath,
+                                size_t orderBits = 10,
+                                size_t fingerPrintBits = 16,
+                                bool multipleScoreTrees = true,
+                                size_t quantize = 0
+#ifdef WITH_THREADS
+                                    , size_t threads = 2
+#endif
+                               );
+
+  ~LexicalReorderingTableCreator();
+
+  friend class EncodingTaskReordering;
+  friend class CompressionTaskReordering;
+};
+
+class EncodingTaskReordering
+{
+private:
+#ifdef WITH_THREADS
+  static boost::mutex m_mutex;
+  static boost::mutex m_fileMutex;
+#endif
+  static size_t m_lineNum;
+  static size_t m_sourcePhraseNum;
+  static std::string m_lastSourcePhrase;
+
+  InputFileStream& m_inFile;
+  LexicalReorderingTableCreator& m_creator;
+
+public:
+  EncodingTaskReordering(InputFileStream& inFile, LexicalReorderingTableCreator& creator);
+  void operator()();
+};
+
+class CompressionTaskReordering
+{
+private:
+#ifdef WITH_THREADS
+  static boost::mutex m_mutex;
+#endif
+  static size_t m_scoresNum;
+  StringVector<unsigned char, unsigned long, MmapAllocator> &m_encodedScores;
+  LexicalReorderingTableCreator &m_creator;
+
+public:
+  CompressionTaskReordering(StringVector<unsigned char, unsigned long, MmapAllocator>&
+                            m_encodedScores, LexicalReorderingTableCreator& creator);
+  void operator()();
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/ListCoders.h b/mosesdecoder/moses/TranslationModel/CompactPT/ListCoders.h
new file mode 100644
index 0000000000000000000000000000000000000000..b78dbdd8a603b1f8f786ce6ceee32c86fb405431
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/ListCoders.h
@@ -0,0 +1,387 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_ListCoders_h
+#define moses_ListCoders_h
+
+#include <cmath>
+#include <cassert>
+
+namespace Moses
+{
+
+template <typename T = unsigned int>
+class VarIntType
+{
+private:
+  template <typename IntType, typename OutIt>
+  static void EncodeSymbol(IntType input, OutIt output) {
+    if(input == 0) {
+      *output = 0;
+      output++;
+      return;
+    }
+
+    T msb = 1 << (sizeof(T)*8-1);
+    IntType mask  = ~msb;
+    IntType shift = (sizeof(T)*8-1);
+
+    while(input) {
+      T res = input & mask;
+      input >>= shift;
+      if(input)
+        res |= msb;
+      *output = res;
+      output++;
+    }
+  };
+
+  template <typename InIt, typename IntType>
+  static void DecodeSymbol(InIt &it, InIt end, IntType &output) {
+    T msb = 1 << (sizeof(T)*8-1);
+    IntType shift = (sizeof(T)*8-1);
+
+    output = 0;
+    size_t i = 0;
+    while(it != end && *it & msb) {
+      IntType temp = *it & ~msb;
+      temp <<= shift*i;
+      output |= temp;
+      it++;
+      i++;
+    }
+    assert(it != end);
+
+    IntType temp = *it;
+    temp <<= shift*i;
+    output |= temp;
+    it++;
+  }
+
+public:
+
+  template <typename InIt, typename OutIt>
+  static void Encode(InIt it, InIt end, OutIt outIt) {
+    while(it != end) {
+      EncodeSymbol(*it, outIt);
+      it++;
+    }
+  }
+
+  template <typename InIt, typename OutIt>
+  static void Decode(InIt &it, InIt end, OutIt outIt) {
+    while(it != end) {
+      size_t output;
+      DecodeSymbol(it, end, output);
+      *outIt = output;
+      outIt++;
+    }
+  }
+
+  template <typename InIt>
+  static size_t DecodeAndSum(InIt &it, InIt end, size_t num) {
+    size_t sum = 0;
+    size_t curr = 0;
+
+    while(it != end && curr < num) {
+      size_t output;
+      DecodeSymbol(it, end, output);
+      sum += output;
+      curr++;
+    }
+
+    return sum;
+  }
+
+};
+
+typedef VarIntType<unsigned char> VarByte;
+
+typedef VarByte VarInt8;
+typedef VarIntType<unsigned short> VarInt16;
+typedef VarIntType<unsigned int>   VarInt32;
+
+class Simple9
+{
+private:
+  typedef unsigned int uint;
+
+  template <typename InIt>
+  inline static void EncodeSymbol(uint &output, InIt it, InIt end) {
+    uint length = end - it;
+
+    uint type = 0;
+    uint bitlength = 0;
+
+    switch(length) {
+    case 1:
+      type = 1;
+      bitlength = 28;
+      break;
+    case 2:
+      type = 2;
+      bitlength = 14;
+      break;
+    case 3:
+      type = 3;
+      bitlength = 9;
+      break;
+    case 4:
+      type = 4;
+      bitlength = 7;
+      break;
+    case 5:
+      type = 5;
+      bitlength = 5;
+      break;
+    case 7:
+      type = 6;
+      bitlength = 4;
+      break;
+    case 9:
+      type = 7;
+      bitlength = 3;
+      break;
+    case 14:
+      type = 8;
+      bitlength = 2;
+      break;
+    case 28:
+      type = 9;
+      bitlength = 1;
+      break;
+    }
+
+    output = 0;
+    output |= (type << 28);
+
+    uint i = 0;
+    while(it != end) {
+      UTIL_THROW_IF2(*it > 268435455, "You are trying to encode " << *it
+                     << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)");
+
+      uint l = bitlength * (length-i-1);
+      output |= *it << l;
+      it++;
+      i++;
+    }
+  }
+
+  template <typename OutIt>
+  static inline void DecodeSymbol(uint input, OutIt outIt) {
+    uint type = (input >> 28);
+
+    uint bitlen = 0;
+    uint shift = 0;
+    uint mask = 0;
+
+    switch(type) {
+    case 1:
+      bitlen = 28;
+      shift = 0;
+      mask = 268435455;
+      break;
+    case 2:
+      bitlen = 14;
+      shift = 14;
+      mask = 16383;
+      break;
+    case 3:
+      bitlen = 9;
+      shift = 18;
+      mask = 511;
+      break;
+    case 4:
+      bitlen = 7;
+      shift = 21;
+      mask = 127;
+      break;
+    case 5:
+      bitlen = 5;
+      shift = 20;
+      mask = 31;
+      break;
+    case 6:
+      bitlen = 4;
+      shift = 24;
+      mask = 15;
+      break;
+    case 7:
+      bitlen = 3;
+      shift = 24;
+      mask = 7;
+      break;
+    case 8:
+      bitlen = 2;
+      shift = 26;
+      mask = 3;
+      break;
+    case 9:
+      bitlen = 1;
+      shift = 27;
+      mask = 1;
+      break;
+    }
+
+    while(shift > 0) {
+      *outIt = (input >> shift) & mask;
+      shift -= bitlen;
+      outIt++;
+    }
+    *outIt = input & mask;
+    outIt++;
+  }
+
+  static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr) {
+    uint type = (input >> 28);
+
+    uint bitlen = 0;
+    uint shift = 0;
+    uint mask = 0;
+
+    switch(type) {
+    case 1:
+      bitlen = 28;
+      shift = 0;
+      mask = 268435455;
+      break;
+    case 2:
+      bitlen = 14;
+      shift = 14;
+      mask = 16383;
+      break;
+    case 3:
+      bitlen = 9;
+      shift = 18;
+      mask = 511;
+      break;
+    case 4:
+      bitlen = 7;
+      shift = 21;
+      mask = 127;
+      break;
+    case 5:
+      bitlen = 5;
+      shift = 20;
+      mask = 31;
+      break;
+    case 6:
+      bitlen = 4;
+      shift = 24;
+      mask = 15;
+      break;
+    case 7:
+      bitlen = 3;
+      shift = 24;
+      mask = 7;
+      break;
+    case 8:
+      bitlen = 2;
+      shift = 26;
+      mask = 3;
+      break;
+    case 9:
+      bitlen = 1;
+      shift = 27;
+      mask = 1;
+      break;
+    }
+
+    size_t sum = 0;
+    while(shift > 0) {
+      sum += (input >> shift) & mask;
+      shift -= bitlen;
+      if(++curr == num)
+        return sum;
+    }
+    sum += input & mask;
+    curr++;
+    return sum;
+  }
+
+public:
+  template <typename InIt, typename OutIt>
+  static void Encode(InIt it, InIt end, OutIt outIt) {
+    uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };
+
+    uint buffer[28];
+    for(InIt i = it; i < end; i++) {
+      uint lastbit = 1;
+      uint lastpos = 0;
+      uint lastyes = 0;
+      uint j = 0;
+
+      double log2 = log(2);
+      while(j < 9 && lastpos < 28 && (i+lastpos) < end) {
+        if(lastpos >= parts[j])
+          j++;
+
+        buffer[lastpos] = *(i + lastpos);
+
+        uint reqbit = ceil(log(buffer[lastpos]+1)/log2);
+        assert(reqbit <= 28);
+
+        uint bit = 28/floor(28/reqbit);
+        if(lastbit < bit)
+          lastbit = bit;
+
+        if(parts[j] > 28/lastbit)
+          break;
+        else if(lastpos == parts[j]-1)
+          lastyes = lastpos;
+
+        lastpos++;
+      }
+      i += lastyes;
+
+      uint length = lastyes + 1;
+      uint output;
+      EncodeSymbol(output, buffer, buffer + length);
+
+      *outIt = output;
+      outIt++;
+    }
+  }
+
+  template <typename InIt, typename OutIt>
+  static void Decode(InIt &it, InIt end, OutIt outIt) {
+    while(it != end) {
+      DecodeSymbol(*it, outIt);
+      it++;
+    }
+  }
+
+  template <typename InIt>
+  static size_t DecodeAndSum(InIt &it, InIt end, size_t num) {
+    size_t sum = 0;
+    size_t curr = 0;
+    while(it != end && curr < num) {
+      sum += DecodeAndSumSymbol(*it, num, curr);
+      it++;
+    }
+    assert(curr == num);
+    return sum;
+  }
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/PackedArray.h b/mosesdecoder/moses/TranslationModel/CompactPT/PackedArray.h
new file mode 100644
index 0000000000000000000000000000000000000000..479c2cc79cde5082b290dd765b28fe3f9d42abfc
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/PackedArray.h
@@ -0,0 +1,187 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_PackedArray_h
+#define moses_PackedArray_h
+
+#include <vector>
+#include <cmath>
+#include <cstring>
+#include <cstdio>
+
+#include "ThrowingFwrite.h"
+
+namespace Moses
+{
+
+template <typename T = size_t, typename D = unsigned char>
+class PackedArray
+{
+protected:
+  static size_t m_dataBits;
+
+  size_t m_size;
+  size_t m_storageSize;
+  D* m_storage;
+
+public:
+  PackedArray() {
+    m_size = 0;
+    m_storageSize = 0;
+    m_storage = new D[0];
+  }
+
+  PackedArray(size_t size, size_t bits) : m_size(size) {
+    m_storageSize = ceil(float(bits * size) / float(m_dataBits));
+    m_storage = new D[m_storageSize];
+  }
+
+  PackedArray(const PackedArray<T, D> &c) {
+    m_size = c.m_size;
+
+    m_storageSize = c.m_storageSize;
+    m_storage = new D[m_storageSize];
+
+    std::memcpy(m_storage, c.m_storage, m_storageSize * sizeof(D));
+  }
+
+  virtual ~PackedArray() {
+    delete [] m_storage;
+    m_size = 0;
+    m_storageSize = 0;
+    m_storage = 0;
+  }
+
+  T Get(size_t i, size_t bits) const {
+    T out = 0;
+
+    size_t bitstart = (i * bits);
+    size_t bitpos = bitstart;
+
+    size_t zero = ((1ul << (bits)) - 1);
+
+    while(bitpos - bitstart < bits) {
+      size_t pos = bitpos / m_dataBits;
+      size_t off = bitpos % m_dataBits;
+
+      out |= (T(m_storage[pos]) << (bitpos - bitstart)) >> off;
+
+      bitpos += (m_dataBits - off);
+    }
+
+    out &= zero;
+    return out;
+  }
+
+  void Set(size_t i, T v, size_t bits) {
+    size_t bitstart = (i * bits);
+    size_t bitpos = bitstart;
+
+    while(bitpos - bitstart < bits) {
+      size_t pos = bitpos / m_dataBits;
+      size_t off = bitpos % m_dataBits;
+
+      size_t rest = bits - (bitpos - bitstart);
+      D zero = ~((1ul << (rest + off)) - 1) | ((1ul << off) - 1);
+
+      m_storage[pos] &= zero;
+      m_storage[pos] |= v << off;
+      v = v >> (m_dataBits - off);
+      bitpos += (m_dataBits - off);
+    }
+  }
+
+  virtual D*& GetStorage() {
+    return m_storage;
+  }
+
+  virtual size_t GetStorageSize() const {
+    return m_storageSize;
+  }
+
+  virtual size_t Size() const {
+    return m_size;
+  }
+
+  virtual size_t Load(std::FILE* in) {
+    size_t a1 = std::ftell(in);
+
+    size_t read = 0;
+    read += std::fread(&m_size, sizeof(m_size), 1, in);
+    read += std::fread(&m_storageSize, sizeof(m_storageSize), 1, in);
+    delete [] m_storage;
+    m_storage = new D[m_storageSize];
+    read += std::fread(m_storage, sizeof(D), m_storageSize, in);
+
+    size_t a2 = std::ftell(in);
+    return a2 - a1;
+  }
+
+  virtual size_t Save(std::FILE* out) {
+    size_t a1 = std::ftell(out);
+
+    ThrowingFwrite(&m_size, sizeof(m_size), 1, out);
+    ThrowingFwrite(&m_storageSize, sizeof(m_storageSize), 1, out);
+    ThrowingFwrite(m_storage, sizeof(D), m_storageSize, out);
+
+    size_t a2 = std::ftell(out);
+    return a2 - a1;
+  }
+
+};
+
+template <typename T, typename D>
+size_t PackedArray<T, D>::m_dataBits = sizeof(D)*8;
+
+/**************************************************************************/
+
+template <typename T = size_t, typename D = unsigned char>
+class PairedPackedArray : public PackedArray<T,D>
+{
+public:
+  PairedPackedArray() : PackedArray<T,D>() {}
+
+  PairedPackedArray(size_t size, size_t bits1, size_t bits2)
+    : PackedArray<T, D>(size, bits1 + bits2) { }
+
+  void Set(size_t i, T a, T b, size_t bits1, size_t bits2) {
+    T c = 0;
+    c = a | (b << bits1);
+    PackedArray<T,D>::Set(i, c, bits1 + bits2);
+  }
+
+  void Set(size_t i, std::pair<T,T> p, size_t bits1, size_t bits2) {
+    T c = 0;
+    c = p.second | (p.first << bits1);
+    PackedArray<T, D>::Set(i, c);
+  }
+
+  std::pair<T, T> Get(size_t i, size_t bits1, size_t bits2) {
+    T v = PackedArray<T, D>::Get(i, bits1 + bits2);
+    T a = v & ((1 << bits1) - 1);
+    T b = v >> bits1;
+    return std::pair<T, T>(a, b);
+  }
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/mosesdecoder/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d93613b8fdf8d4cf8e722e137d3241e5e4fde755
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -0,0 +1,194 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <queue>
+#include <algorithm>
+#include <sys/stat.h>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/thread/tss.hpp>
+
+#include "PhraseDictionaryCompact.h"
+#include "moses/FactorCollection.h"
+#include "moses/Word.h"
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/Range.h"
+#include "moses/ThreadPool.h"
+#include "util/exception.hh"
+
+using namespace std;
+using namespace boost::algorithm;
+
+namespace Moses
+{
+
+PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
+
+PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
+  :PhraseDictionary(line, true)
+  ,m_inMemory(s_inMemoryByDefault)
+  ,m_useAlignmentInfo(true)
+  ,m_hash(10, 16)
+  ,m_phraseDecoder(0)
+{
+  ReadParameters();
+}
+
+void PhraseDictionaryCompact::Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  const StaticData &staticData = StaticData::Instance();
+
+  SetFeaturesToApply();
+
+  std::string tFilePath = m_filePath;
+
+  std::string suffix = ".minphr";
+  if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
+  if (!FileExists(tFilePath))
+    throw runtime_error("Error: File " + tFilePath + " does not exist.");
+
+  m_phraseDecoder
+  = new PhraseDecoder(*this, &m_input, &m_output, m_numScoreComponents);
+
+  std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
+
+  size_t indexSize;
+  //if(m_inMemory)
+  // Load source phrase index into memory
+  indexSize = m_hash.Load(pFile);
+  // else
+  // Keep source phrase index on disk
+  //indexSize = m_hash.LoadIndex(pFile);
+
+  size_t coderSize = m_phraseDecoder->Load(pFile);
+
+  size_t phraseSize;
+  if(m_inMemory)
+    // Load target phrase collections into memory
+    phraseSize = m_targetPhrasesMemory.load(pFile, false);
+  else
+    // Keep target phrase collections on disk
+    phraseSize = m_targetPhrasesMapped.load(pFile, true);
+
+  UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
+                 "Not successfully loaded");
+}
+
+TargetPhraseCollection::shared_ptr
+PhraseDictionaryCompact::
+GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
+{
+  //cerr << "sourcePhrase=" << sourcePhrase << endl;
+
+  TargetPhraseCollection::shared_ptr ret;
+  // There is no souch source phrase if source phrase is longer than longest
+  // observed source phrase during compilation
+  if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
+    return ret;
+
+  // Retrieve target phrase collection from phrase table
+  TargetPhraseVectorPtr decodedPhraseColl
+  = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
+
+  if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
+    TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
+    TargetPhraseCollection::shared_ptr  phraseColl(new TargetPhraseCollection);
+
+    // Score phrases and if possible apply ttable_limit
+    TargetPhraseVector::iterator nth =
+      (m_tableLimit == 0 || tpv->size() < m_tableLimit) ?
+      tpv->end() : tpv->begin() + m_tableLimit;
+    NTH_ELEMENT4(tpv->begin(), nth, tpv->end(), CompareTargetPhrase());
+    for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++) {
+      TargetPhrase *tp = new TargetPhrase(*it);
+      phraseColl->Add(tp);
+    }
+
+    // Cache phrase pair for clean-up or retrieval with PREnc
+    const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
+
+    return phraseColl;
+  } else
+    return ret;
+}
+
+TargetPhraseVectorPtr
+PhraseDictionaryCompact::
+GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const
+{
+
+  // There is no such source phrase if source phrase is longer than longest
+  // observed source phrase during compilation
+  if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
+    return TargetPhraseVectorPtr();
+
+  // Retrieve target phrase collection from phrase table
+  return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
+}
+
+PhraseDictionaryCompact::
+~PhraseDictionaryCompact()
+{
+  if(m_phraseDecoder)
+    delete m_phraseDecoder;
+}
+
+void
+PhraseDictionaryCompact::
+CacheForCleanup(TargetPhraseCollection::shared_ptr  tpc)
+{
+  if(!m_sentenceCache.get())
+    m_sentenceCache.reset(new PhraseCache());
+  m_sentenceCache->push_back(tpc);
+}
+
+void
+PhraseDictionaryCompact::
+AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
+{ }
+
+void
+PhraseDictionaryCompact::
+CleanUpAfterSentenceProcessing(const InputType &source)
+{
+  if(!m_sentenceCache.get())
+    m_sentenceCache.reset(new PhraseCache());
+
+  m_phraseDecoder->PruneCache();
+  m_sentenceCache->clear();
+
+  ReduceCache();
+}
+
+bool PhraseDictionaryCompact::s_inMemoryByDefault = false;
+void
+PhraseDictionaryCompact::
+SetStaticDefaultParameters(Parameter const& param)
+{
+  param.SetParameter(s_inMemoryByDefault, "minphr-memory", false);
+}
+}
+
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/StringVectorTemp.h b/mosesdecoder/moses/TranslationModel/CompactPT/StringVectorTemp.h
new file mode 100644
index 0000000000000000000000000000000000000000..ffac0b718f7bc55fea6d4445e04746599d4fc2df
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/StringVectorTemp.h
@@ -0,0 +1,430 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_StringVectorTemp_h
+#define moses_StringVectorTemp_h
+
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <iterator>
+#include <cstdio>
+#include <cassert>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+#include "ThrowingFwrite.h"
+#include "StringVector.h"
+
+#include "MmapAllocator.h"
+
+namespace Moses
+{
+
+
+// ********** StringVectorTemp **********
+
+template <typename ValueT = unsigned char, typename PosT = unsigned int,
+         template <typename> class Allocator = std::allocator>
+class StringVectorTemp
+{
+protected:
+  bool m_sorted;
+  bool m_memoryMapped;
+
+  std::vector<ValueT, Allocator<ValueT> >* m_charArray;
+  std::vector<PosT> m_positions;
+
+  virtual const ValueT* value_ptr(PosT i) const;
+
+public:
+  //typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
+  typedef ValueIteratorRange<const ValueT *> range;
+
+  // ********** RangeIterator **********
+
+  class RangeIterator : public boost::iterator_facade<RangeIterator,
+    range, std::random_access_iterator_tag, range, PosT>
+  {
+
+  private:
+    PosT m_index;
+    StringVectorTemp<ValueT, PosT, Allocator>* m_container;
+
+  public:
+    RangeIterator();
+    RangeIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+    PosT get_index();
+
+  private:
+    friend class boost::iterator_core_access;
+
+    range dereference() const;
+    bool equal(RangeIterator const& other) const;
+    void increment();
+    void decrement();
+    void advance(PosT n);
+
+    PosT distance_to(RangeIterator const& other) const;
+  };
+
+  // ********** StringIterator **********
+
+  class StringIterator : public boost::iterator_facade<StringIterator,
+    std::string, std::random_access_iterator_tag, const std::string, PosT>
+  {
+
+  private:
+    PosT m_index;
+    StringVectorTemp<ValueT, PosT, Allocator>* m_container;
+
+  public:
+    StringIterator();
+    StringIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+    PosT get_index();
+
+  private:
+    friend class boost::iterator_core_access;
+
+    const std::string dereference() const;
+    bool equal(StringIterator const& other) const;
+    void increment();
+    void decrement();
+    void advance(PosT n);
+    PosT distance_to(StringIterator const& other) const;
+  };
+
+  typedef RangeIterator iterator;
+  typedef StringIterator string_iterator;
+
+  StringVectorTemp();
+  StringVectorTemp(Allocator<ValueT> alloc);
+
+  virtual ~StringVectorTemp() {
+    delete m_charArray;
+  }
+
+  void swap(StringVectorTemp<ValueT, PosT, Allocator> &c) {
+    m_positions.swap(c.m_positions);
+    m_charArray->swap(*c.m_charArray);
+
+    bool temp = m_sorted;
+    m_sorted = c.m_sorted;
+    c.m_sorted = temp;
+  }
+
+  bool is_sorted() const;
+  PosT size() const;
+  virtual PosT size2() const;
+
+  template<class Iterator> Iterator begin() const;
+  template<class Iterator> Iterator end() const;
+
+  iterator begin() const;
+  iterator end() const;
+
+  PosT length(PosT i) const;
+  //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
+  //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
+  const ValueT* begin(PosT i) const;
+  const ValueT* end(PosT i) const;
+
+  void clear() {
+    m_charArray->clear();
+    m_sorted = true;
+    m_positions.clear();
+  }
+
+  range at(PosT i) const;
+  range operator[](PosT i) const;
+  range back() const;
+
+  template <typename StringT>
+  void push_back(StringT s);
+  void push_back(const char* c);
+
+  template <typename StringT>
+  PosT find(StringT &s) const;
+  PosT find(const char* c) const;
+};
+
+// ********** Implementation **********
+
+// StringVectorTemp
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringVectorTemp()
+  : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >()) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringVectorTemp(Allocator<ValueT> alloc)
+  : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >(alloc)) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+void StringVectorTemp<ValueT, PosT, Allocator>::push_back(StringT s)
+{
+  if(is_sorted() && size() && !(back() < s))
+    m_sorted = false;
+
+  m_positions.push_back(size2());
+  std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::push_back(const char* c)
+{
+  std::string dummy(c);
+  push_back(dummy);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVectorTemp<ValueT, PosT, Allocator>::begin() const
+{
+  return Iterator(const_cast<StringVectorTemp<ValueT, PosT, Allocator>&>(*this), 0);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVectorTemp<ValueT, PosT, Allocator>::end() const
+{
+  return Iterator(const_cast<StringVectorTemp<ValueT, PosT, Allocator>&>(*this), size());
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::iterator StringVectorTemp<ValueT, PosT, Allocator>::begin() const
+{
+  return begin<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::iterator StringVectorTemp<ValueT, PosT, Allocator>::end() const
+{
+  return end<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVectorTemp<ValueT, PosT, Allocator>::is_sorted() const
+{
+  return m_sorted;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::size() const
+{
+  return m_positions.size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::size2() const
+{
+  return m_charArray->size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::at(PosT i) const
+{
+  return range(begin(i), end(i));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::operator[](PosT i) const
+{
+  return at(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::back() const
+{
+  return at(size()-1);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::length(PosT i) const
+{
+  if(i+1 < size())
+    return m_positions[i+1] - m_positions[i];
+  else
+    return size2() - m_positions[i];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::value_ptr(PosT i) const
+{
+  return &(*m_charArray)[m_positions[i]];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVectorTemp<ValueT, PosT, Allocator>::begin(PosT i) const
+const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::begin(PosT i) const
+{
+  //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
+  return value_ptr(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVectorTemp<ValueT, PosT, Allocator>::end(PosT i) const
+const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::end(PosT i) const
+{
+  //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
+  return value_ptr(i) + length(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::find(StringT &s) const
+{
+  if(m_sorted)
+    return std::distance(begin(), std::lower_bound(begin(), end(), s));
+  return std::distance(begin(), std::find(begin(), end(), s));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::find(const char* c) const
+{
+  std::string s(c);
+  return find(s);
+}
+
+// RangeIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index)
+  : m_index(index), m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::get_index()
+{
+  return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVectorTemp<ValueT, PosT, Allocator>::range
+StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::dereference() const
+{
+  return typename StringVectorTemp<ValueT, PosT, Allocator>::range(
+           m_container->begin(m_index),
+           m_container->end(m_index)
+         );
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::equal(
+  StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+  return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::increment()
+{
+  m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::decrement()
+{
+  m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
+{
+  m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::distance_to(
+  StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+  return other.m_index - m_index;
+}
+
+// StringIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::StringIterator()
+  : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::StringIterator(
+  StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index) : m_index(index),
+  m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::get_index()
+{
+  return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const std::string StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::dereference() const
+{
+  return StringVectorTemp<ValueT, PosT, Allocator>::range(m_container->begin(m_index),
+         m_container->end(m_index)).str();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::equal(
+  StringVectorTemp<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+  return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::increment()
+{
+  m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::decrement()
+{
+  m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
+{
+  m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::distance_to(
+  StringVectorTemp<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+  return other.m_index - m_index;
+}
+
+// ********** Some typedefs **********
+
+typedef StringVectorTemp<unsigned char, unsigned int> MediumStringVectorTemp;
+typedef StringVectorTemp<unsigned char, unsigned long> LongStringVectorTemp;
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h b/mosesdecoder/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
new file mode 100644
index 0000000000000000000000000000000000000000..e017a3c1914397f84bcffb93240b5c3e1fc40e55
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.h
@@ -0,0 +1,163 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_TargetPhraseCollectionCache_h
+#define moses_TargetPhraseCollectionCache_h
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include <boost/thread/tss.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include "moses/Phrase.h"
+#include "moses/TargetPhraseCollection.h"
+
+namespace Moses
+{
+
+// Avoid using new due to locking
+typedef std::vector<TargetPhrase> TargetPhraseVector;
+typedef boost::shared_ptr<TargetPhraseVector> TargetPhraseVectorPtr;
+
+/** Implementation of Persistent Cache **/
+class TargetPhraseCollectionCache
+{
+private:
+  size_t m_max;
+  float m_tolerance;
+
+  struct LastUsed {
+    clock_t m_clock;
+    TargetPhraseVectorPtr m_tpv;
+    size_t m_bitsLeft;
+
+    LastUsed() : m_clock(0), m_bitsLeft(0) {}
+
+    LastUsed(clock_t clock, TargetPhraseVectorPtr tpv, size_t bitsLeft = 0)
+      : m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {}
+  };
+
+  typedef std::map<Phrase, LastUsed> CacheMap;
+  static boost::thread_specific_ptr<CacheMap> m_phraseCache;
+
+public:
+
+  typedef CacheMap::iterator iterator;
+  typedef CacheMap::const_iterator const_iterator;
+
+  TargetPhraseCollectionCache(size_t max = 5000, float tolerance = 0.2)
+    : m_max(max), m_tolerance(tolerance) {
+  }
+
+  iterator Begin() {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    return m_phraseCache->begin();
+  }
+
+  const_iterator Begin() const {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    return m_phraseCache->begin();
+  }
+
+  iterator End() {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    return m_phraseCache->end();
+  }
+
+  const_iterator End() const {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    return m_phraseCache->end();
+  }
+
+  /** retrieve translations for source phrase from persistent cache **/
+  void Cache(const Phrase &sourcePhrase, TargetPhraseVectorPtr tpv,
+             size_t bitsLeft = 0, size_t maxRank = 0) {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    // check if source phrase is already in cache
+    iterator it = m_phraseCache->find(sourcePhrase);
+    if(it != m_phraseCache->end())
+      // if found, just update clock
+      it->second.m_clock = clock();
+    else {
+      // else, add to cache
+      if(maxRank && tpv->size() > maxRank) {
+        TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
+        tpv_temp->resize(maxRank);
+        std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
+        (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
+      } else
+        (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
+    }
+  }
+
+  std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase &sourcePhrase) {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    iterator it = m_phraseCache->find(sourcePhrase);
+    if(it != m_phraseCache->end()) {
+      LastUsed &lu = it->second;
+      lu.m_clock = clock();
+      return std::make_pair(lu.m_tpv, lu.m_bitsLeft);
+    } else
+      return std::make_pair(TargetPhraseVectorPtr(), 0);
+  }
+
+  // if cache full, reduce
+  void Prune() {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    if(m_phraseCache->size() > m_max * (1 + m_tolerance)) {
+      typedef std::set<std::pair<clock_t, Phrase> > Cands;
+      Cands cands;
+      for(CacheMap::iterator it = m_phraseCache->begin();
+          it != m_phraseCache->end(); it++) {
+        LastUsed &lu = it->second;
+        cands.insert(std::make_pair(lu.m_clock, it->first));
+      }
+
+      for(Cands::iterator it = cands.begin(); it != cands.end(); it++) {
+        const Phrase& p = it->second;
+        m_phraseCache->erase(p);
+
+        if(m_phraseCache->size() < (m_max * (1 - m_tolerance)))
+          break;
+      }
+    }
+  }
+
+  void CleanUp() {
+    if(!m_phraseCache.get())
+      m_phraseCache.reset(new CacheMap());
+    m_phraseCache->clear();
+  }
+
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/ThrowingFwrite.cpp b/mosesdecoder/moses/TranslationModel/CompactPT/ThrowingFwrite.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..554c4ed7b599da6b089a6dfede35479158b20bf1
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/ThrowingFwrite.cpp
@@ -0,0 +1,30 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "ThrowingFwrite.h"
+
+size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream)
+{
+  assert(size);
+  size_t returnValue = std::fwrite(ptr, size, count, stream);
+  UTIL_THROW_IF2(count != returnValue, "Short fwrite; requested size " << size);
+  return returnValue;
+}
diff --git a/mosesdecoder/moses/TranslationModel/CompactPT/ThrowingFwrite.h b/mosesdecoder/moses/TranslationModel/CompactPT/ThrowingFwrite.h
new file mode 100644
index 0000000000000000000000000000000000000000..466d3973b33eb78dd9dd80854da5a229ceceb97e
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/CompactPT/ThrowingFwrite.h
@@ -0,0 +1,31 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_ThrowingFwrite_h
+#define moses_ThrowingFwrite_h
+
+#include <cassert>
+#include <cstdio>
+#include "util/exception.hh"
+
+size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream);
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/LoaderFactory.cpp b/mosesdecoder/moses/TranslationModel/RuleTable/LoaderFactory.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e7ec1d48dde290750903313bf0d84aa475265ea3
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/LoaderFactory.cpp
@@ -0,0 +1,65 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LoaderFactory.h"
+
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "LoaderCompact.h"
+#include "LoaderHiero.h"
+#include "LoaderStandard.h"
+
+#include <sstream>
+#include <iostream>
+
+using namespace std;
+
+namespace Moses
+{
+
+// Determines the rule table type by peeking inside the file then creates
+// a suitable RuleTableLoader object.
+std::auto_ptr<RuleTableLoader>
+RuleTableLoaderFactory::
+Create(const std::string &path)
+{
+  InputFileStream input(path);
+  std::string line;
+
+  if (std::getline(input, line)) {
+    std::vector<std::string> tokens;
+    Tokenize(tokens, line);
+    if (tokens.size() == 1) {
+      if (tokens[0] == "1") {
+        return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderCompact());
+      }
+      std::cerr << "Unsupported compact rule table format: " << tokens[0];
+      return std::auto_ptr<RuleTableLoader>();
+    } else if (tokens[0] == "[X]" && tokens[1] == "|||") {
+      return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderHiero());
+    }
+
+    return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderStandard());
+  } else {
+    // empty phrase table
+    return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderStandard());
+  }
+}
+
+}  // namespace Moses
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/LoaderHiero.cpp b/mosesdecoder/moses/TranslationModel/RuleTable/LoaderHiero.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eb81d56772e07a4ba7737ed338c2346751245888
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/LoaderHiero.cpp
@@ -0,0 +1,33 @@
+//
+//  RuleTableLoaderHiero.cpp
+//  moses
+//
+//  Created by Hieu Hoang on 04/11/2011.
+//  Copyright 2011 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "LoaderHiero.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+bool RuleTableLoaderHiero::Load(AllOptions const& opts,
+                                const std::vector<FactorType> &input,
+                                const std::vector<FactorType> &output,
+                                const std::string &inFile,
+                                size_t tableLimit,
+                                RuleTableTrie &ruleTable)
+{
+  bool ret = RuleTableLoaderStandard::Load(opts, HieroFormat
+             ,input, output
+             ,inFile
+             ,tableLimit
+             ,ruleTable);
+  return ret;
+}
+
+}
+
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/LoaderStandard.cpp b/mosesdecoder/moses/TranslationModel/RuleTable/LoaderStandard.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c84286588830eec2eef9405e64727d6ed82c9be3
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/LoaderStandard.cpp
@@ -0,0 +1,260 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LoaderStandard.h"
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include <iostream>
+#include <sys/stat.h>
+#include <cstdlib>
+#include <boost/algorithm/string/predicate.hpp>
+#include "Trie.h"
+#include "moses/FactorCollection.h"
+#include "moses/Word.h"
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/Range.h"
+#include "moses/ChartTranslationOptionList.h"
+#include "moses/FactorCollection.h"
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+#include "util/double-conversion/double-conversion.h"
+#include "util/exception.hh"
+
+using namespace std;
+using namespace boost::algorithm;
+
+namespace Moses
+{
+
+bool
+RuleTableLoaderStandard::
+Load(AllOptions const& opts
+     , const std::vector<FactorType> &input
+     , const std::vector<FactorType> &output
+     , const std::string &inFile
+     , size_t tableLimit
+     , RuleTableTrie &ruleTable)
+{
+  return Load(opts, MosesFormat,input, output ,inFile ,tableLimit ,ruleTable);
+}
+
+void ReformatHieroRule(int sourceTarget, string &phrase, map<size_t, pair<size_t, size_t> > &ntAlign)
+{
+  vector<string> toks;
+  Tokenize(toks, phrase, " ");
+
+  for (size_t i = 0; i < toks.size(); ++i) {
+    string &tok = toks[i];
+    if (starts_with(tok, "[") && ends_with(tok, "]")) {
+      // no-term
+      vector<string> split = Tokenize(tok, ",");
+      UTIL_THROW_IF2(split.size() != 2,
+                     "Incorrectly formmatted non-terminal: " << tok);
+
+      tok = "[X]" + split[0] + "]";
+      size_t coIndex = Scan<size_t>(split[1]);
+
+      pair<size_t, size_t> &alignPoint = ntAlign[coIndex];
+      if (sourceTarget == 0) {
+        alignPoint.first = i;
+      } else {
+        alignPoint.second = i;
+      }
+    }
+  }
+
+  phrase = Join(" ", toks) + " [X]";
+
+}
+
+void ReformateHieroScore(string &scoreString)
+{
+  vector<string> toks;
+  Tokenize(toks, scoreString, " ");
+
+  for (size_t i = 0; i < toks.size(); ++i) {
+    string &tok = toks[i];
+    vector<string> nameValue = Tokenize(tok, "=");
+    UTIL_THROW_IF2(nameValue.size() != 2,
+                   "Incorrectly formatted score: " << tok);
+
+    float score = Scan<float>(nameValue[1]);
+    score = exp(-score);
+    tok = SPrint(score);
+  }
+
+  scoreString = Join(" ", toks);
+}
+
+void ReformatHieroRule(const string &lineOrig, string &out)
+{
+  vector<string> tokens;
+  vector<float> scoreVector;
+
+  TokenizeMultiCharSeparator(tokens, lineOrig, "|||" );
+
+  string &sourcePhraseString = tokens[1]
+                               , &targetPhraseString = tokens[2]
+                                   , &scoreString        = tokens[3];
+
+  map<size_t, pair<size_t, size_t> > ntAlign;
+  ReformatHieroRule(0, sourcePhraseString, ntAlign);
+  ReformatHieroRule(1, targetPhraseString, ntAlign);
+  ReformateHieroScore(scoreString);
+
+  util::StringStream align;
+  map<size_t, pair<size_t, size_t> >::const_iterator iterAlign;
+  for (iterAlign = ntAlign.begin(); iterAlign != ntAlign.end(); ++iterAlign) {
+    const pair<size_t, size_t> &alignPoint = iterAlign->second;
+    align << alignPoint.first << "-" << alignPoint.second << " ";
+  }
+
+  util::StringStream ret;
+  ret << sourcePhraseString << " ||| "
+      << targetPhraseString << " ||| "
+      << scoreString << " ||| "
+      << align.str();
+
+  out = ret.str();
+}
+
+bool RuleTableLoaderStandard::Load(AllOptions const& opts, FormatType format
+                                   , const std::vector<FactorType> &input
+                                   , const std::vector<FactorType> &output
+                                   , const std::string &inFile
+                                   , size_t /* tableLimit */
+                                   , RuleTableTrie &ruleTable)
+{
+  PrintUserTime(string("Start loading text phrase table. ") + (format==MosesFormat?"Moses":"Hiero") + " format");
+
+  // const StaticData &staticData = StaticData::Instance();
+
+  string lineOrig;
+  size_t count = 0;
+
+  std::ostream *progress = NULL;
+  IFVERBOSE(1) progress = &std::cerr;
+  util::FilePiece in(inFile.c_str(), progress);
+
+  // reused variables
+  vector<float> scoreVector;
+  StringPiece line;
+  std::string hiero_before, hiero_after;
+
+  double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+  while(true) {
+    try {
+      line = in.ReadLine();
+    } catch (const util::EndOfFileException &e) {
+      break;
+    }
+
+    if (format == HieroFormat) { // inefficiently reformat line
+      hiero_before.assign(line.data(), line.size());
+      ReformatHieroRule(hiero_before, hiero_after);
+      line = hiero_after;
+    }
+
+    util::TokenIter<util::MultiCharacter> pipes(line, "|||");
+    StringPiece sourcePhraseString(*pipes);
+    StringPiece targetPhraseString(*++pipes);
+    StringPiece scoreString(*++pipes);
+
+    StringPiece alignString;
+    if (++pipes) {
+      StringPiece temp(*pipes);
+      alignString = temp;
+    }
+
+    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
+    if (isLHSEmpty && !opts.unk.word_deletion_enabled) {
+      TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
+      continue;
+    }
+
+    scoreVector.clear();
+    for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
+      int processed;
+      float score = converter.StringToFloat(s->data(), s->length(), &processed);
+      UTIL_THROW_IF2(isnan(score), "Bad score " << *s << " on line " << count);
+      scoreVector.push_back(FloorScore(TransformScore(score)));
+    }
+    const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
+    if (scoreVector.size() != numScoreComponents) {
+      UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
+                  << numScoreComponents << ") of score components on line " << count);
+    }
+
+    // parse source & find pt node
+
+    // constituent labels
+    Word *sourceLHS = NULL;
+    Word *targetLHS;
+
+    // create target phrase obj
+    TargetPhrase *targetPhrase = new TargetPhrase(&ruleTable);
+    targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
+    // source
+    Phrase sourcePhrase;
+    sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS);
+
+    // rest of target phrase
+    targetPhrase->SetAlignmentInfo(alignString);
+    targetPhrase->SetTargetLHS(targetLHS);
+
+    ++pipes;  // skip over counts field
+
+    if (++pipes) {
+      StringPiece sparseString(*pipes);
+      targetPhrase->SetSparseScore(&ruleTable, sparseString);
+    }
+
+    if (++pipes) {
+      StringPiece propertiesString(*pipes);
+      targetPhrase->SetProperties(propertiesString);
+    }
+
+    targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
+    targetPhrase->EvaluateInIsolation(sourcePhrase, ruleTable.GetFeaturesToApply());
+
+    TargetPhraseCollection::shared_ptr phraseColl
+    = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase,
+                                        *targetPhrase, sourceLHS);
+    phraseColl->Add(targetPhrase);
+
+    // not implemented correctly in memory pt. just delete it for now
+    delete sourceLHS;
+
+    count++;
+  }
+
+  // sort and prune each target phrase collection
+  SortAndPrune(ruleTable);
+
+  return true;
+}
+
+}
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/mosesdecoder/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..50dd4bb8afa4aab8510c1d3ad8420376112079f5
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@@ -0,0 +1,417 @@
+// vim:tabstop=2
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <climits>
+#include <sys/types.h>
+#include <unistd.h>
+#include <dirent.h>
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include "Loader.h"
+#include "LoaderFactory.h"
+#include "PhraseDictionaryFuzzyMatch.h"
+#include "moses/FactorCollection.h"
+#include "moses/Word.h"
+#include "moses/Util.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/Range.h"
+#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h"
+#include "moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.h"
+#include "moses/TranslationModel/fuzzy-match/SentenceAlignment.h"
+#include "moses/TranslationTask.h"
+#include "util/file.hh"
+#include "util/exception.hh"
+#include "util/random.hh"
+
+using namespace std;
+
+#if defined __MINGW32__ && !defined mkdtemp
+#include <windows.h>
+#include <cerrno>
+char *mkdtemp(char *tempbuf)
+{
+  int rand_value = 0;
+  char* tempbase = NULL;
+  char tempbasebuf[MAX_PATH] = "";
+
+  if (strcmp(&tempbuf[strlen(tempbuf)-6], "XXXXXX")) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  util::rand_init();
+  rand_value = util::rand_excl(1e6);
+  tempbase = strrchr(tempbuf, '/');
+  tempbase = tempbase ? tempbase+1 : tempbuf;
+  strcpy(tempbasebuf, tempbase);
+  sprintf(&tempbasebuf[strlen(tempbasebuf)-6], "%d", rand_value);
+  ::GetTempPath(MAX_PATH, tempbuf);
+  strcat(tempbuf, tempbasebuf);
+  ::CreateDirectory(tempbuf, NULL);
+  return tempbuf;
+}
+#endif
+
+namespace Moses
+{
+
+PhraseDictionaryFuzzyMatch::PhraseDictionaryFuzzyMatch(const std::string &line)
+  :PhraseDictionary(line, true)
+  ,m_config(3)
+  ,m_FuzzyMatchWrapper(NULL)
+{
+  ReadParameters();
+}
+
+PhraseDictionaryFuzzyMatch::~PhraseDictionaryFuzzyMatch()
+{
+  delete m_FuzzyMatchWrapper;
+}
+
+void PhraseDictionaryFuzzyMatch::Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  SetFeaturesToApply();
+
+  m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]);
+}
+
+ChartRuleLookupManager *PhraseDictionaryFuzzyMatch::CreateRuleLookupManager(
+  const ChartParser &parser,
+  const ChartCellCollectionBase &cellCollection,
+  std::size_t /*maxChartSpan*/)
+{
+  return new ChartRuleLookupManagerMemoryPerSentence(parser, cellCollection, *this);
+}
+
+void
+PhraseDictionaryFuzzyMatch::
+SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "source") {
+    m_config[0] = value;
+  } else if (key == "target") {
+    m_config[1] = value;
+  } else if (key == "alignment") {
+    m_config[2] = value;
+  } else {
+    PhraseDictionary::SetParameter(key, value);
+  }
+}
+
+int removedirectoryrecursively(const char *dirname)
+{
+#if defined __MINGW32__
+  //TODO(jie): replace this function with boost implementation
+#else
+  DIR *dir;
+  struct dirent *entry;
+  char path[PATH_MAX];
+
+  dir = opendir(dirname);
+  if (dir == NULL) {
+    perror("Error opendir()");
+    return 0;
+  }
+
+  while ((entry = readdir(dir)) != NULL) {
+    if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) {
+      snprintf(path, (size_t) PATH_MAX, "%s/%s", dirname, entry->d_name);
+      if (entry->d_type == DT_DIR) {
+        removedirectoryrecursively(path);
+      }
+
+      remove(path);
+      /*
+       * Here, the actual deletion must be done.  Beacuse this is
+       * quite a dangerous thing to do, and this program is not very
+       * well tested, we are just printing as if we are deleting.
+       */
+      //printf("(not really) Deleting: %s\n", path);
+      /*
+       * When you are finished testing this and feel you are ready to do the real
+       * deleting, use this: remove*STUB*(path);
+       * (see "man 3 remove")
+       * Please note that I DONT TAKE RESPONSIBILITY for data you delete with this!
+       */
+    }
+
+  }
+  closedir(dir);
+
+  rmdir(dirname);
+  /*
+   * Now the directory is emtpy, finally delete the directory itself. (Just
+   * printing here, see above)
+   */
+  //printf("(not really) Deleting: %s\n", dirname);
+#endif
+  return 1;
+}
+
+void PhraseDictionaryFuzzyMatch::InitializeForInput(ttasksptr const& ttask)
+{
+  InputType const& inputSentence = *ttask->GetSource();
+#if defined __MINGW32__
+  char dirName[] = "moses.XXXXXX";
+#else
+  char dirName[] = "/tmp/moses.XXXXXX";
+#endif // defined
+  char *temp = mkdtemp(dirName);
+  UTIL_THROW_IF2(temp == NULL,
+                 "Couldn't create temporary directory " << dirName);
+
+  string dirNameStr(dirName);
+
+  string inFileName(dirNameStr + "/in");
+
+  ofstream inFile(inFileName.c_str());
+
+  for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) {
+    inFile << inputSentence.GetWord(i);
+  }
+  inFile << endl;
+  inFile.close();
+
+  long translationId = inputSentence.GetTranslationId();
+  string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr);
+
+  // populate with rules for this sentence
+  PhraseDictionaryNodeMemory &rootNode = m_collection[translationId];
+  FormatType format = MosesFormat;
+
+  // data from file
+  InputFileStream inStream(ptFileName);
+
+  // copied from class LoaderStandard
+  PrintUserTime("Start loading fuzzy-match phrase model");
+
+  const StaticData &staticData = StaticData::Instance();
+
+
+  string lineOrig;
+  size_t count = 0;
+
+  while(getline(inStream, lineOrig)) {
+    const string *line;
+    if (format == HieroFormat) { // reformat line
+      UTIL_THROW(util::Exception, "Cannot be Hiero format");
+      //line = ReformatHieroRule(lineOrig);
+    } else {
+      // do nothing to format of line
+      line = &lineOrig;
+    }
+
+    vector<string> tokens;
+    vector<float> scoreVector;
+
+    TokenizeMultiCharSeparator(tokens, *line , "|||" );
+
+    if (tokens.size() != 4 && tokens.size() != 5) {
+      UTIL_THROW2("Syntax error at " << ptFileName << ":" << count);
+    }
+
+    const string &sourcePhraseString = tokens[0]
+                                       , &targetPhraseString = tokens[1]
+                                           , &scoreString        = tokens[2]
+                                               , &alignString        = tokens[3];
+
+    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
+    if (isLHSEmpty && !ttask->options()->unk.word_deletion_enabled) {
+      TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
+      continue;
+    }
+
+    Tokenize<float>(scoreVector, scoreString);
+    const size_t numScoreComponents = GetNumScoreComponents();
+    if (scoreVector.size() != numScoreComponents) {
+      UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
+                  << numScoreComponents << ") of score components on line " << count);
+    }
+
+    UTIL_THROW_IF2(scoreVector.size() != numScoreComponents,
+                   "Number of scores incorrectly specified");
+
+    // parse source & find pt node
+
+    // constituent labels
+    Word *sourceLHS;
+    Word *targetLHS;
+
+    // source
+    Phrase sourcePhrase( 0);
+    sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, &sourceLHS);
+
+    // create target phrase obj
+    TargetPhrase *targetPhrase = new TargetPhrase(this);
+    targetPhrase->CreateFromString(Output, m_output, targetPhraseString, &targetLHS);
+
+    // rest of target phrase
+    targetPhrase->SetAlignmentInfo(alignString);
+    targetPhrase->SetTargetLHS(targetLHS);
+    //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
+
+    // component score, for n-best output
+    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
+    std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
+
+    targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
+    targetPhrase->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
+
+    TargetPhraseCollection::shared_ptr phraseColl
+    = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase,
+                                        *targetPhrase, sourceLHS);
+    phraseColl->Add(targetPhrase);
+
+    count++;
+
+    if (format == HieroFormat) { // reformat line
+      delete line;
+    } else {
+      // do nothing
+    }
+
+  }
+
+  // sort and prune each target phrase collection
+  SortAndPrune(rootNode);
+
+  //removedirectoryrecursively(dirName);
+}
+
+TargetPhraseCollection::shared_ptr
+PhraseDictionaryFuzzyMatch::
+GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeMemory &rootNode
+                                  , const Phrase &source
+                                  , const TargetPhrase &target
+                                  , const Word *sourceLHS)
+{
+  PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(rootNode, source, target, sourceLHS);
+  return currNode.GetTargetPhraseCollection();
+}
+
+PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeMemory &rootNode
+    , const Phrase &source
+    , const TargetPhrase &target
+    , const Word *sourceLHS)
+{
+  cerr << source << endl << target << endl;
+  const size_t size = source.GetSize();
+
+  const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
+  AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
+
+  PhraseDictionaryNodeMemory *currNode = &rootNode;
+  for (size_t pos = 0 ; pos < size ; ++pos) {
+    const Word& word = source.GetWord(pos);
+
+    if (word.IsNonTerminal()) {
+      // indexed by source label 1st
+      const Word &sourceNonTerm = word;
+
+      UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
+                     "No alignment for non-term at position " << pos);
+      UTIL_THROW_IF2(iterAlign->first != pos,
+                     "Alignment info incorrect at position " << pos);
+
+      size_t targetNonTermInd = iterAlign->second;
+      ++iterAlign;
+      const Word &targetNonTerm = target.GetWord(targetNonTermInd);
+
+#if defined(UNLABELLED_SOURCE)
+      currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm);
+#else
+      currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
+#endif
+    } else {
+      currNode = currNode->GetOrCreateChild(word);
+    }
+
+    UTIL_THROW_IF2(currNode == NULL,
+                   "Node not found at position " << pos);
+
+  }
+
+  // finally, the source LHS
+  //currNode = currNode->GetOrCreateChild(sourceLHS);
+
+  return *currNode;
+}
+
+void PhraseDictionaryFuzzyMatch::SortAndPrune(PhraseDictionaryNodeMemory &rootNode)
+{
+  if (GetTableLimit()) {
+    rootNode.Sort(GetTableLimit());
+  }
+}
+
+void PhraseDictionaryFuzzyMatch::CleanUpAfterSentenceProcessing(const InputType &source)
+{
+  m_collection.erase(source.GetTranslationId());
+}
+
+const PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(long translationId) const
+{
+  std::map<long, PhraseDictionaryNodeMemory>::const_iterator iter = m_collection.find(translationId);
+  UTIL_THROW_IF2(iter == m_collection.end(),
+                 "Couldn't find root node for input: " << translationId);
+  return iter->second;
+}
+PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetRootNode(const InputType &source)
+{
+  long transId = source.GetTranslationId();
+  std::map<long, PhraseDictionaryNodeMemory>::iterator iter = m_collection.find(transId);
+  UTIL_THROW_IF2(iter == m_collection.end(),
+                 "Couldn't find root node for input: " << transId);
+  return iter->second;
+}
+
+TO_STRING_BODY(PhraseDictionaryFuzzyMatch);
+
+// friend
+ostream& operator<<(ostream& out, const PhraseDictionaryFuzzyMatch& phraseDict)
+{
+  /*
+  typedef PhraseDictionaryNodeMemory::TerminalMap TermMap;
+  typedef PhraseDictionaryNodeMemory::NonTerminalMap NonTermMap;
+
+  const PhraseDictionaryNodeMemory &coll = phraseDict.m_collection;
+  for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
+    const Word &sourceNonTerm = p->first.first;
+    out << sourceNonTerm;
+  }
+  for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
+    const Word &sourceTerm = p->first;
+    out << sourceTerm;
+  }
+   */
+
+  return out;
+}
+
+}
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/mosesdecoder/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7766c897a898f04bc69662ac73910a7325be83e1
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp
@@ -0,0 +1,398 @@
+// vim:tabstop=2
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 Hieu Hoang
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include "PhraseDictionaryOnDisk.h"
+#include "moses/InputFileStream.h"
+#include "moses/StaticData.h"
+#include "moses/TargetPhraseCollection.h"
+#include "moses/InputPath.h"
+#include "moses/TranslationModel/CYKPlusParser/DotChartOnDisk.h"
+#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerOnDisk.h"
+#include "moses/TranslationTask.h"
+
+#include "OnDiskPt/OnDiskWrapper.h"
+#include "OnDiskPt/Word.h"
+
+#include "util/tokenize_piece.hh"
+
+using namespace std;
+
+
+namespace Moses
+{
+PhraseDictionaryOnDisk::PhraseDictionaryOnDisk(const std::string &line)
+  : MyBase(line, true)
+  , m_maxSpanDefault(NOT_FOUND)
+  , m_maxSpanLabelled(NOT_FOUND)
+{
+  ReadParameters();
+}
+
+PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk()
+{
+}
+
+void PhraseDictionaryOnDisk::Load(AllOptions::ptr const& opts)
+{
+  m_options = opts;
+  SetFeaturesToApply();
+}
+
+ChartRuleLookupManager *PhraseDictionaryOnDisk::CreateRuleLookupManager(
+  const ChartParser &parser,
+  const ChartCellCollectionBase &cellCollection,
+  std::size_t /*maxChartSpan*/)
+{
+  return new ChartRuleLookupManagerOnDisk(parser, cellCollection, *this,
+                                          GetImplementation(),
+                                          m_input,
+                                          m_output);
+}
+
+OnDiskPt::OnDiskWrapper &PhraseDictionaryOnDisk::GetImplementation()
+{
+  OnDiskPt::OnDiskWrapper* dict;
+  dict = m_implementation.get();
+  UTIL_THROW_IF2(dict == NULL, "Dictionary object not yet created for this thread");
+  return *dict;
+}
+
+const OnDiskPt::OnDiskWrapper &PhraseDictionaryOnDisk::GetImplementation() const
+{
+  OnDiskPt::OnDiskWrapper* dict;
+  dict = m_implementation.get();
+  UTIL_THROW_IF2(dict == NULL, "Dictionary object not yet created for this thread");
+  return *dict;
+}
+
+void PhraseDictionaryOnDisk::InitializeForInput(ttasksptr const& ttask)
+{
+  InputType const& source = *ttask->GetSource();
+  ReduceCache();
+
+  OnDiskPt::OnDiskWrapper *obj = new OnDiskPt::OnDiskWrapper();
+  obj->BeginLoad(m_filePath);
+
+  UTIL_THROW_IF2(obj->GetMisc("Version") != OnDiskPt::OnDiskWrapper::VERSION_NUM,
+                 "On-disk phrase table is version " <<  obj->GetMisc("Version")
+                 << ". It is not compatible with version " << OnDiskPt::OnDiskWrapper::VERSION_NUM);
+
+  UTIL_THROW_IF2(obj->GetMisc("NumSourceFactors") != m_input.size(),
+                 "On-disk phrase table has " <<  obj->GetMisc("NumSourceFactors") << " source factors."
+                 << ". The ini file specified " << m_input.size() << " source factors");
+
+  UTIL_THROW_IF2(obj->GetMisc("NumTargetFactors") != m_output.size(),
+                 "On-disk phrase table has " <<  obj->GetMisc("NumTargetFactors") << " target factors."
+                 << ". The ini file specified " << m_output.size() << " target factors");
+
+  UTIL_THROW_IF2(obj->GetMisc("NumScores") != m_numScoreComponents,
+                 "On-disk phrase table has " <<  obj->GetMisc("NumScores") << " scores."
+                 << ". The ini file specified " << m_numScoreComponents << " scores");
+
+  m_implementation.reset(obj);
+}
+
+void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
+{
+  InputPathList::const_iterator iter;
+  for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
+    InputPath &inputPath = **iter;
+    GetTargetPhraseCollectionBatch(inputPath);
+  }
+
+  // delete nodes that's been saved
+  for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
+    InputPath &inputPath = **iter;
+    const OnDiskPt::PhraseNode *ptNode = static_cast<const OnDiskPt::PhraseNode*>(inputPath.GetPtNode(*this));
+    delete ptNode;
+  }
+
+}
+
+void PhraseDictionaryOnDisk::GetTargetPhraseCollectionBatch(InputPath &inputPath) const
+{
+  OnDiskPt::OnDiskWrapper &wrapper = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
+  const Phrase &phrase = inputPath.GetPhrase();
+  const InputPath *prevInputPath = inputPath.GetPrevPath();
+
+  const OnDiskPt::PhraseNode *prevPtNode = NULL;
+
+  if (prevInputPath) {
+    prevPtNode = static_cast<const OnDiskPt::PhraseNode*>(prevInputPath->GetPtNode(*this));
+  } else {
+    // Starting subphrase.
+    assert(phrase.GetSize() == 1);
+    prevPtNode = &wrapper.GetRootSourceNode();
+  }
+
+  // backoff
+  if (!SatisfyBackoff(inputPath)) {
+    return;
+  }
+
+  if (prevPtNode) {
+    Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
+    lastWord.OnlyTheseFactors(m_inputFactors);
+    OnDiskPt::Word *lastWordOnDisk = ConvertFromMoses(wrapper, m_input, lastWord);
+
+    TargetPhraseCollection::shared_ptr tpc;
+    if (lastWordOnDisk == NULL) {
+      // OOV according to this phrase table. Not possible to extend
+      inputPath.SetTargetPhrases(*this, tpc, NULL);
+    } else {
+      OnDiskPt::PhraseNode const* ptNode;
+      ptNode = prevPtNode->GetChild(*lastWordOnDisk, wrapper);
+      if (ptNode) tpc = GetTargetPhraseCollection(ptNode);
+      inputPath.SetTargetPhrases(*this, tpc, ptNode);
+
+      delete lastWordOnDisk;
+    }
+  }
+}
+
+TargetPhraseCollection::shared_ptr
+PhraseDictionaryOnDisk::
+GetTargetPhraseCollection(const OnDiskPt::PhraseNode *ptNode) const
+{
+  TargetPhraseCollection::shared_ptr ret;
+
+  CacheColl &cache = GetCache();
+  size_t hash = (size_t) ptNode->GetFilePos();
+
+  CacheColl::iterator iter;
+
+  iter = cache.find(hash);
+
+  if (iter == cache.end()) {
+    // not in cache, need to look up from phrase table
+    ret = GetTargetPhraseCollectionNonCache(ptNode);
+
+    std::pair<TargetPhraseCollection::shared_ptr , clock_t> value(ret, clock());
+    cache[hash] = value;
+  } else {
+    // in cache. just use it
+    iter->second.second = clock();
+    ret = iter->second.first;
+  }
+
+  return ret;
+}
+
+TargetPhraseCollection::shared_ptr
+PhraseDictionaryOnDisk::
+GetTargetPhraseCollectionNonCache(const OnDiskPt::PhraseNode *ptNode) const
+{
+  OnDiskPt::OnDiskWrapper& wrapper
+  = const_cast<OnDiskPt::OnDiskWrapper&>(GetImplementation());
+
+  vector<float> weightT = StaticData::Instance().GetWeights(this);
+  OnDiskPt::Vocab &vocab = wrapper.GetVocab();
+
+  OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk
+  = ptNode->GetTargetPhraseCollection(m_tableLimit, wrapper);
+  TargetPhraseCollection::shared_ptr targetPhrases
+  = ConvertToMoses(targetPhrasesOnDisk, m_input, m_output, *this,
+                   weightT, vocab, false);
+
+  // delete targetPhrasesOnDisk;
+
+  return targetPhrases;
+}
+
+Moses::TargetPhraseCollection::shared_ptr
+PhraseDictionaryOnDisk::ConvertToMoses(
+  const OnDiskPt::TargetPhraseCollection::shared_ptr targetPhrasesOnDisk
+  , const std::vector<Moses::FactorType> &inputFactors
+  , const std::vector<Moses::FactorType> &outputFactors
+  , const Moses::PhraseDictionary &phraseDict
+  , const std::vector<float> &weightT
+  , OnDiskPt::Vocab &vocab
+  , bool isSyntax) const
+{
+  Moses::TargetPhraseCollection::shared_ptr ret;
+  ret.reset(new Moses::TargetPhraseCollection);
+
+  for (size_t i = 0; i < targetPhrasesOnDisk->GetSize(); ++i) {
+    const OnDiskPt::TargetPhrase &tp = targetPhrasesOnDisk->GetTargetPhrase(i);
+    Moses::TargetPhrase *mosesPhrase
+    = ConvertToMoses(tp, inputFactors, outputFactors, vocab,
+                     phraseDict, weightT, isSyntax);
+
+    /*
+    // debugging output
+    stringstream strme;
+    strme << filePath << " " << *mosesPhrase;
+    mosesPhrase->SetDebugOutput(strme.str());
+    */
+
+    ret->Add(mosesPhrase);
+  }
+
+  ret->Sort(true, phraseDict.GetTableLimit());
+
+  return ret;
+}
+
+Moses::TargetPhrase *PhraseDictionaryOnDisk::ConvertToMoses(const OnDiskPt::TargetPhrase &targetPhraseOnDisk
+    , const std::vector<Moses::FactorType> &inputFactors
+    , const std::vector<Moses::FactorType> &outputFactors
+    , const OnDiskPt::Vocab &vocab
+    , const Moses::PhraseDictionary &phraseDict
+    , const std::vector<float> &weightT
+    , bool isSyntax) const
+{
+  Moses::TargetPhrase *ret = new Moses::TargetPhrase(&phraseDict);
+
+  // words
+  size_t phraseSize = targetPhraseOnDisk.GetSize();
+  UTIL_THROW_IF2(phraseSize == 0, "Target phrase cannot be empty"); // last word is lhs
+  if (isSyntax) {
+    --phraseSize;
+  }
+
+  for (size_t pos = 0; pos < phraseSize; ++pos) {
+    const OnDiskPt::Word &wordOnDisk = targetPhraseOnDisk.GetWord(pos);
+    ConvertToMoses(wordOnDisk, outputFactors, vocab, ret->AddWord());
+  }
+
+  // alignments
+  // int index = 0;
+  Moses::AlignmentInfo::CollType alignTerm, alignNonTerm;
+  std::set<std::pair<size_t, size_t> > alignmentInfo;
+  const OnDiskPt::PhrasePtr sp = targetPhraseOnDisk.GetSourcePhrase();
+  for (size_t ind = 0; ind < targetPhraseOnDisk.GetAlign().size(); ++ind) {
+    const std::pair<size_t, size_t> &entry = targetPhraseOnDisk.GetAlign()[ind];
+    alignmentInfo.insert(entry);
+    size_t sourcePos = entry.first;
+    size_t targetPos = entry.second;
+
+    if (targetPhraseOnDisk.GetWord(targetPos).IsNonTerminal()) {
+      alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
+    } else {
+      alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
+    }
+
+  }
+  ret->SetAlignTerm(alignTerm);
+  ret->SetAlignNonTerm(alignNonTerm);
+
+  if (isSyntax) {
+    Moses::Word *lhsTarget = new Moses::Word(true);
+    const OnDiskPt::Word &lhsOnDisk = targetPhraseOnDisk.GetWord(targetPhraseOnDisk.GetSize() - 1);
+    ConvertToMoses(lhsOnDisk, outputFactors, vocab, *lhsTarget);
+    ret->SetTargetLHS(lhsTarget);
+  }
+
+  // set source phrase
+  Moses::Phrase mosesSP(Moses::Input);
+  for (size_t pos = 0; pos < sp->GetSize(); ++pos) {
+    ConvertToMoses(sp->GetWord(pos), inputFactors, vocab, mosesSP.AddWord());
+  }
+
+  // scores
+  ret->GetScoreBreakdown().Assign(&phraseDict, targetPhraseOnDisk.GetScores());
+
+  // sparse features
+  ret->GetScoreBreakdown().Assign(&phraseDict, targetPhraseOnDisk.GetSparseFeatures());
+
+  // property
+  ret->SetProperties(targetPhraseOnDisk.GetProperty());
+
+  ret->EvaluateInIsolation(mosesSP, phraseDict.GetFeaturesToApply());
+
+  return ret;
+}
+
+void PhraseDictionaryOnDisk::ConvertToMoses(
+  const OnDiskPt::Word &wordOnDisk,
+  const std::vector<Moses::FactorType> &outputFactorsVec,
+  const OnDiskPt::Vocab &vocab,
+  Moses::Word &overwrite) const
+{
+  Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
+  overwrite = Moses::Word(wordOnDisk.IsNonTerminal());
+
+  if (wordOnDisk.IsNonTerminal()) {
+    const std::string &tok = vocab.GetString(wordOnDisk.GetVocabId());
+    overwrite.SetFactor(0, factorColl.AddFactor(tok, wordOnDisk.IsNonTerminal()));
+  } else {
+    // TODO: this conversion should have been done at load time.
+    util::TokenIter<util::SingleCharacter> tok(vocab.GetString(wordOnDisk.GetVocabId()), '|');
+
+    for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
+      UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(wordOnDisk.GetVocabId()) << "\"; was expecting " << outputFactorsVec.size());
+      overwrite.SetFactor(*t, factorColl.AddFactor(*tok, wordOnDisk.IsNonTerminal()));
+    }
+    UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(wordOnDisk.GetVocabId()) << "\"; was expecting " << outputFactorsVec.size());
+  }
+}
+
+OnDiskPt::Word *PhraseDictionaryOnDisk::ConvertFromMoses(OnDiskPt::OnDiskWrapper &wrapper, const std::vector<Moses::FactorType> &factorsVec
+    , const Moses::Word &origWord) const
+{
+  bool isNonTerminal = origWord.IsNonTerminal();
+  OnDiskPt::Word *newWord = new OnDiskPt::Word(isNonTerminal);
+
+  util::StringStream strme;
+
+  size_t factorType = factorsVec[0];
+  const Moses::Factor *factor = origWord.GetFactor(factorType);
+  UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType);
+  strme << factor->GetString();
+
+  for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
+    size_t factorType = factorsVec[ind];
+    const Moses::Factor *factor = origWord.GetFactor(factorType);
+    if (factor == NULL) {
+      // can have less factors than factorType.size()
+      break;
+    }
+    UTIL_THROW_IF2(factor == NULL,
+                   "Expecting factor " << factorType << " at position " << ind);
+    strme << "|" << factor->GetString();
+  } // for (size_t factorType
+
+  bool found;
+  uint64_t vocabId = wrapper.GetVocab().GetVocabId(strme.str(), found);
+  if (!found) {
+    // factor not in phrase table -> phrse definately not in. exit
+    delete newWord;
+    return NULL;
+  } else {
+    newWord->SetVocabId(vocabId);
+    return newWord;
+  }
+
+}
+
+void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "max-span-default") {
+    m_maxSpanDefault = Scan<size_t>(value);
+  } else if (key == "max-span-labelled") {
+    m_maxSpanLabelled = Scan<size_t>(value);
+  } else {
+    PhraseDictionary::SetParameter(key, value);
+  }
+}
+
+
+} // namespace
+
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/Trie.h b/mosesdecoder/moses/TranslationModel/RuleTable/Trie.h
new file mode 100644
index 0000000000000000000000000000000000000000..7a9e12e8dd88c37af2a1793c960ba1b4717de5d2
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/Trie.h
@@ -0,0 +1,63 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/TypeDef.h"
+
+#include <string>
+#include <vector>
+
+namespace Moses
+{
+
+class Phrase;
+class TargetPhrase;
+class TargetPhraseCollection;
+class Word;
+
+/*** Implementation of a SCFG rule table in a trie.  Looking up a rule of
+ * length n symbols requires n look-ups to find the TargetPhraseCollection.
+ * @todo why need this and PhraseDictionaryMemory?
+ */
+class RuleTableTrie : public PhraseDictionary
+{
+public:
+  RuleTableTrie(const std::string &line)
+    : PhraseDictionary(line, true) {
+  }
+
+  virtual ~RuleTableTrie();
+
+  void Load(AllOptions::ptr const& opts);
+
+private:
+  friend class RuleTableLoader;
+
+  virtual TargetPhraseCollection::shared_ptr
+  GetOrCreateTargetPhraseCollection(const Phrase &source,
+                                    const TargetPhrase &target,
+                                    const Word *sourceLHS) = 0;
+
+  virtual void SortAndPrune() = 0;
+
+};
+
+}  // namespace Moses
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/UTrie.cpp b/mosesdecoder/moses/TranslationModel/RuleTable/UTrie.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eee88a11b2036a46b52f422e6ae9244accae8fe7
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/UTrie.cpp
@@ -0,0 +1,96 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ ***********************************************************************/
+
+#include "moses/NonTerminal.h"
+#include "moses/TranslationModel/Scope3Parser/Parser.h"
+#include "moses/StaticData.h"
+#include "moses/TargetPhrase.h"
+#include "moses/TargetPhraseCollection.h"
+#include "moses/Util.h"
+#include "moses/Word.h"
+#include "UTrie.h"
+#include "Trie.h"
+#include "UTrieNode.h"
+
+#include <boost/functional/hash.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/version.hpp>
+
+#include <map>
+#include <vector>
+
+namespace Moses
+{
+
+TargetPhraseCollection::shared_ptr
+RuleTableUTrie::
+GetOrCreateTargetPhraseCollection(const Phrase &source,
+                                  const TargetPhrase &target,
+                                  const Word *sourceLHS)
+{
+  UTrieNode &currNode = GetOrCreateNode(source, target, sourceLHS);
+  return currNode.GetOrCreateTargetPhraseCollection(target);
+}
+
+UTrieNode &RuleTableUTrie::GetOrCreateNode(const Phrase &source,
+    const TargetPhrase &target,
+    const Word */*sourceLHS*/)
+{
+  const size_t size = source.GetSize();
+
+  const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
+  AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
+
+  UTrieNode *currNode = &m_root;
+  for (size_t pos = 0 ; pos < size ; ++pos) {
+    const Word &word = source.GetWord(pos);
+
+    if (word.IsNonTerminal()) {
+      assert(iterAlign != alignmentInfo.end());
+      assert(iterAlign->first == pos);
+      size_t targetNonTermInd = iterAlign->second;
+      ++iterAlign;
+      const Word &targetNonTerm = target.GetWord(targetNonTermInd);
+      currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm);
+    } else {
+      currNode = currNode->GetOrCreateTerminalChild(word);
+    }
+
+    assert(currNode != NULL);
+  }
+
+  return *currNode;
+}
+
+ChartRuleLookupManager *RuleTableUTrie::CreateRuleLookupManager(
+  const ChartParser &parser,
+  const ChartCellCollectionBase &cellCollection,
+  std::size_t maxChartSpan)
+{
+  return new Scope3Parser(parser, cellCollection, *this, maxChartSpan);
+}
+
+void RuleTableUTrie::SortAndPrune()
+{
+  if (GetTableLimit()) {
+    m_root.Sort(GetTableLimit());
+  }
+}
+
+}  // namespace Moses
diff --git a/mosesdecoder/moses/TranslationModel/RuleTable/UTrie.h b/mosesdecoder/moses/TranslationModel/RuleTable/UTrie.h
new file mode 100644
index 0000000000000000000000000000000000000000..708bf866e2b9efc4dbbd9ad87f5d9fc25ebc8d41
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/RuleTable/UTrie.h
@@ -0,0 +1,73 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2012 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "Trie.h"
+#include "UTrieNode.h"
+#include "moses/TargetPhraseCollection.h"
+
+namespace Moses
+{
+
+class Phrase;
+class TargetPhrase;
+class Word;
+class ChartParser;
+
+/** Implementation of RuleTableTrie.  A RuleTableUTrie is designed to store
+ * string-to-tree SCFG grammars only (i.e. rules can have distinct labels on
+ * the target side, but only a generic non-terminal on the source side).
+ * A key is the source RHS (one symbol per edge) of a rule and a mapped value
+ * is the collection of grammar rules that share the same source RHS.
+ *
+ * (The 'U' in UTrie stands for 'unlabelled' -- the keys are unlabelled and
+ * the target labels are stored on the node values, as opposed to the grammar
+ * being a monolingual projection with target labels projected onto the source
+ * side.)
+ */
+class RuleTableUTrie : public RuleTableTrie
+{
+public:
+  RuleTableUTrie(const std::string &line)
+    : RuleTableTrie(line) {
+  }
+
+  const UTrieNode &GetRootNode() const {
+    return m_root;
+  }
+
+  ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &,
+      const ChartCellCollectionBase &, std::size_t);
+
+private:
+  TargetPhraseCollection::shared_ptr
+  GetOrCreateTargetPhraseCollection(const Phrase &source,
+                                    const TargetPhrase &target,
+                                    const Word *sourceLHS);
+
+  UTrieNode &GetOrCreateNode(const Phrase &source, const TargetPhrase &target,
+                             const Word *sourceLHS);
+
+  void SortAndPrune();
+
+  UTrieNode m_root;
+};
+
+}  // namespace Moses
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/Alignments.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/Alignments.h
new file mode 100644
index 0000000000000000000000000000000000000000..f54372d27b020f6e36db0ebe6921d30f827fc575
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/Alignments.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <map>
+
+class Alignments
+{
+public:
+  std::vector< std::map<int, int> > m_alignS2T, m_alignT2S;
+
+  Alignments(const std::string &align, size_t sourceSize, size_t targetSize);
+
+
+protected:
+
+};
+
+
+
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp b/mosesdecoder/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..89287ca9127418fdcabd665abaee9b61977a76f5
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
@@ -0,0 +1,1029 @@
+//
+//  FuzzyMatchWrapper.cpp
+//  moses
+//
+//  Created by Hieu Hoang on 26/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "FuzzyMatchWrapper.h"
+#include "SentenceAlignment.h"
+#include "Match.h"
+#include "create_xml.h"
+#include "moses/Util.h"
+#include "moses/StaticData.h"
+#include "util/file.hh"
+
+using namespace std;
+
+namespace tmmt
+{
+
+FuzzyMatchWrapper::FuzzyMatchWrapper(const std::string &sourcePath, const std::string &targetPath, const std::string &alignmentPath)
+  :basic_flag(false)
+  ,lsed_flag(true)
+  ,refined_flag(true)
+  ,length_filter_flag(true)
+  ,parse_flag(true)
+  ,min_match(70)
+  ,multiple_flag(true)
+  ,multiple_slack(0)
+  ,multiple_max(100)
+{
+  cerr << "creating suffix array" << endl;
+  suffixArray = new tmmt::SuffixArray( sourcePath );
+
+  //cerr << "loading source data" << endl;
+  //load_corpus(sourcePath, source);
+
+  cerr << "loading target data" << endl;
+  load_target(targetPath, targetAndAlignment);
+
+  cerr << "loading alignment" << endl;
+  load_alignment(alignmentPath, targetAndAlignment);
+
+  // create suffix array
+  //load_corpus(m_config[0], input);
+
+  cerr << "loading completed" << endl;
+}
+
+string FuzzyMatchWrapper::Extract(long translationId, const string &dirNameStr)
+{
+  const Moses::StaticData &staticData = Moses::StaticData::Instance();
+
+  WordIndex wordIndex;
+
+  string fuzzyMatchFile = ExtractTM(wordIndex, translationId, dirNameStr);
+
+  // create extrac files
+  create_xml(fuzzyMatchFile);
+
+  // create phrase table with usual Moses scoring and consolidate programs
+  string cmd;
+  cmd = "LC_ALL=C sort " + fuzzyMatchFile + ".extract | gzip -c > "
+        + fuzzyMatchFile + ".extract.sorted.gz";
+  system(cmd.c_str());
+  cmd = "LC_ALL=C sort " + fuzzyMatchFile + ".extract.inv | gzip -c > "
+        + fuzzyMatchFile + ".extract.inv.sorted.gz";
+  system(cmd.c_str());
+
+#ifdef IS_XCODE
+  cmd = "/Users/hieuhoang/unison/workspace/github/moses-smt/bin";
+#elif IS_ECLIPSE
+  cmd = "/home/hieu/workspace/github/moses-smt/bin";
+#else
+  cmd = staticData.GetBinDirectory();
+#endif
+
+  cmd += string("/../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical ")
+         + " -extract-file " + fuzzyMatchFile + ".extract -lexical-file - -score-options \"--NoLex\" "
+         + " -phrase-translation-table " + fuzzyMatchFile + ".pt";
+  system(cmd.c_str());
+
+
+  return fuzzyMatchFile + ".pt.gz";
+}
+
+string FuzzyMatchWrapper::ExtractTM(WordIndex &wordIndex, long translationId, const string &dirNameStr)
+{
+  const std::vector< std::vector< WORD_ID > > &source = suffixArray->GetCorpus();
+
+  string inputPath = dirNameStr + "/in";
+  string fuzzyMatchFile = dirNameStr + "/fuzzyMatchFile";
+  ofstream fuzzyMatchStream(fuzzyMatchFile.c_str());
+
+  vector< vector< WORD_ID > > input;
+  load_corpus(inputPath, input);
+
+  assert(input.size() == 1);
+  size_t sentenceInd = 0;
+
+  clock_t start_clock = clock();
+  // if (i % 10 == 0) cerr << ".";
+
+  // establish some basic statistics
+
+  // int input_length = compute_length( input[i] );
+  int input_length = input[sentenceInd].size();
+  int best_cost = input_length * (100-min_match) / 100 + 1;
+
+  int match_count = 0; // how many substring matches to be considered
+  //cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
+
+  // find match ranges in suffix array
+  vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
+  for(int start=0; start<input[sentenceInd].size(); start++) {
+    SuffixArray::INDEX prior_first_match = 0;
+    SuffixArray::INDEX prior_last_match = suffixArray->GetSize()-1;
+    vector< string > substring;
+    bool stillMatched = true;
+    vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
+    //cerr << "start: " << start;
+    for(size_t word=start; stillMatched && word<input[sentenceInd].size(); word++) {
+      substring.push_back( GetVocabulary().GetWord( input[sentenceInd][word] ) );
+
+      // only look up, if needed (i.e. no unnecessary short gram lookups)
+      //				if (! word-start+1 <= short_match_max_length( input_length ) )
+      //			{
+      SuffixArray::INDEX first_match, last_match;
+      stillMatched = false;
+      if (suffixArray->FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) ) {
+        stillMatched = true;
+        matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
+        //cerr << " (" << first_match << "," << last_match << ")";
+        //cerr << " " << ( last_match - first_match + 1 );
+        prior_first_match = first_match;
+        prior_last_match = last_match;
+      }
+      //}
+    }
+    //cerr << endl;
+    match_range.push_back( matchedAtThisStart );
+  }
+
+  clock_t clock_range = clock();
+
+  map< int, vector< Match > > sentence_match;
+  map< int, int > sentence_match_word_count;
+
+  // go through all matches, longest first
+  for(int length = input[sentenceInd].size(); length >= 1; length--) {
+    // do not create matches, if these are handled by the short match function
+    if (length <= short_match_max_length( input_length ) ) {
+      continue;
+    }
+
+    unsigned int count = 0;
+    for(int start = 0; start <= input[sentenceInd].size() - length; start++) {
+      if (match_range[start].size() >= length) {
+        pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
+        // cerr << " (" << range.first << "," << range.second << ")";
+        count += range.second - range.first + 1;
+
+        for(SuffixArray::INDEX i=range.first; i<=range.second; i++) {
+          size_t position = suffixArray->GetPosition( i );
+
+          // sentence length mismatch
+          size_t sentence_id = suffixArray->GetSentence( position );
+          int sentence_length = suffixArray->GetSentenceLength( sentence_id );
+          int diff = abs( (int)sentence_length - (int)input_length );
+          // cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
+          //if (length <= 2 && input_length>=5 &&
+          //		sentence_match.find( sentence_id ) == sentence_match.end())
+          //	continue;
+
+          if (diff > best_cost)
+            continue;
+
+          // compute minimal cost
+          int start_pos = suffixArray->GetWordInSentence( position );
+          int end_pos = start_pos + length-1;
+          // cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. "
+          // << start << "-" << (start+length-1) << " (" << input_length << ")";
+          // different number of prior words -> cost is at least diff
+          int min_cost = abs( start - start_pos );
+
+          // same number of words, but not sent. start -> cost is at least 1
+          if (start == start_pos && start>0)
+            min_cost++;
+
+          // different number of remaining words -> cost is at least diff
+          min_cost += abs( ( sentence_length-1 - end_pos ) -
+                           ( input_length-1 - (start+length-1) ) );
+
+          // same number of words, but not sent. end -> cost is at least 1
+          if ( sentence_length-1 - end_pos ==
+               input_length-1 - (start+length-1)
+               && end_pos != sentence_length-1 )
+            min_cost++;
+
+          // cerr << " -> min_cost " << min_cost;
+          if (min_cost > best_cost)
+            continue;
+
+          // valid match
+          match_count++;
+
+          // compute maximal cost
+          int max_cost = max( start, start_pos )
+                         + max( sentence_length-1 - end_pos,
+                                input_length-1 - (start+length-1) );
+          // cerr << ", max_cost " << max_cost;
+
+          Match m = Match( start, start+length-1,
+                           start_pos, start_pos+length-1,
+                           min_cost, max_cost, 0);
+          sentence_match[ sentence_id ].push_back( m );
+          sentence_match_word_count[ sentence_id ] += length;
+
+          if (max_cost < best_cost) {
+            best_cost = max_cost;
+            if (best_cost == 0) break;
+          }
+          //if (match_count >= MAX_MATCH_COUNT) break;
+        }
+      }
+      // cerr << endl;
+      if (best_cost == 0) break;
+      //if (match_count >= MAX_MATCH_COUNT) break;
+    }
+    // cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
+
+    if (best_cost == 0) break;
+    //if (match_count >= MAX_MATCH_COUNT) break;
+  }
+  cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
+
+  clock_t clock_matches = clock();
+
+  // consider each sentence for which we have matches
+  int old_best_cost = best_cost;
+  int tm_count_word_match = 0;
+  int tm_count_word_match2 = 0;
+  int pruned_match_count = 0;
+  if (short_match_max_length( input_length )) {
+    init_short_matches(wordIndex, translationId, input[sentenceInd] );
+  }
+  vector< int > best_tm;
+  typedef map< int, vector< Match > >::iterator I;
+
+  clock_t clock_validation_sum = 0;
+
+  for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++) {
+    int tmID = tm->first;
+    int tm_length = suffixArray->GetSentenceLength(tmID);
+    vector< Match > &match = tm->second;
+    add_short_matches(wordIndex, translationId, match, source[tmID], input_length, best_cost );
+
+    //cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
+
+    // quick look: how many words are matched
+    int words_matched = 0;
+    for(size_t m=0; m<match.size(); m++) {
+
+      if (match[m].min_cost <= best_cost) // makes no difference
+        words_matched += match[m].input_end - match[m].input_start + 1;
+    }
+    if (max(input_length,tm_length) - words_matched > best_cost) {
+      if (length_filter_flag) continue;
+    }
+    tm_count_word_match++;
+
+    // prune, check again how many words are matched
+    vector< Match > pruned = prune_matches( match, best_cost );
+    words_matched = 0;
+    for(size_t p=0; p<pruned.size(); p++) {
+      words_matched += pruned[p].input_end - pruned[p].input_start + 1;
+    }
+    if (max(input_length,tm_length) - words_matched > best_cost) {
+      if (length_filter_flag) continue;
+    }
+    tm_count_word_match2++;
+
+    pruned_match_count += pruned.size();
+    int prior_best_cost = best_cost;
+    int cost;
+
+    clock_t clock_validation_start = clock();
+    if (! parse_flag ||
+        pruned.size()>=10) { // to prevent worst cases
+      string path;
+      cost = sed( input[sentenceInd], source[tmID], path, false );
+      if (cost <  best_cost) {
+        best_cost = cost;
+      }
+    }
+
+    else {
+      cost = parse_matches( pruned, input_length, tm_length, best_cost );
+      if (prior_best_cost != best_cost) {
+        best_tm.clear();
+      }
+    }
+    clock_validation_sum += clock() - clock_validation_start;
+    if (cost == best_cost) {
+      best_tm.push_back( tmID );
+    }
+  }
+  cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
+  cerr << "tm considered: " << sentence_match.size()
+       << " word-matched: " << tm_count_word_match
+       << " word-matched2: " << tm_count_word_match2
+       << " best: " << best_tm.size() << endl;
+
+  cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
+
+  // create xml and extract files
+  string inputStr, sourceStr;
+  for (size_t pos = 0; pos < input_length; ++pos) {
+    inputStr += GetVocabulary().GetWord(input[sentenceInd][pos]) + " ";
+  }
+
+  // do not try to find the best ... report multiple matches
+  if (multiple_flag) {
+    for(size_t si=0; si<best_tm.size(); si++) {
+      int s = best_tm[si];
+      string path;
+      sed( input[sentenceInd], source[s], path, true );
+      const vector<WORD_ID> &sourceSentence = source[s];
+      vector<SentenceAlignment> &targets = targetAndAlignment[s];
+      create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, path, fuzzyMatchStream);
+
+    }
+  } // if (multiple_flag)
+  else {
+
+    // find the best matches according to letter sed
+    string best_path = "";
+    int best_match = -1;
+    unsigned int best_letter_cost;
+    if (lsed_flag) {
+      best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
+      for(size_t si=0; si<best_tm.size(); si++) {
+        int s = best_tm[si];
+        string path;
+        unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+        if (letter_cost < best_letter_cost) {
+          best_letter_cost = letter_cost;
+          best_path = path;
+          best_match = s;
+        }
+      }
+    }
+    // if letter sed turned off, just compute path for first match
+    else {
+      if (best_tm.size() > 0) {
+        string path;
+        sed( input[sentenceInd], source[best_tm[0]], path, false );
+        best_path = path;
+        best_match = best_tm[0];
+      }
+    }
+    cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
+         << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
+         << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
+         << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
+         << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
+         << " )" << endl;
+    if (lsed_flag) {
+      //cout << best_letter_cost << "/" << compute_length( input[sentenceInd] ) << " (";
+    }
+    //cout << best_cost <<"/" << input_length;
+    if (lsed_flag) {
+      //cout << ")";
+    }
+    //cout << " ||| " << best_match << " ||| " << best_path << endl;
+
+    if (best_match == -1) {
+      UTIL_THROW_IF2(source.size() == 0, "Empty source phrase");
+      best_match = 0;
+    }
+
+    // creat xml & extracts
+    const vector<WORD_ID> &sourceSentence = source[best_match];
+    vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
+    create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, best_path, fuzzyMatchStream);
+
+  } // else if (multiple_flag)
+
+  fuzzyMatchStream.close();
+
+  return fuzzyMatchFile;
+}
+
+void FuzzyMatchWrapper::load_corpus( const std::string &fileName, vector< vector< WORD_ID > > &corpus )
+{
+  // source
+  ifstream fileStream;
+  fileStream.open(fileName.c_str());
+  if (!fileStream) {
+    cerr << "file not found: " << fileName << endl;
+    exit(1);
+  }
+  cerr << "loading " << fileName << endl;
+
+  istream *fileStreamP = &fileStream;
+
+  string line;
+  while(getline(*fileStreamP, line)) {
+    corpus.push_back( GetVocabulary().Tokenize( line.c_str() ) );
+  }
+}
+
+void FuzzyMatchWrapper::load_target(const std::string &fileName, vector< vector< SentenceAlignment > > &corpus)
+{
+  ifstream fileStream;
+  fileStream.open(fileName.c_str());
+  if (!fileStream) {
+    cerr << "file not found: " << fileName << endl;
+    exit(1);
+  }
+  cerr << "loading " << fileName << endl;
+
+  istream *fileStreamP = &fileStream;
+
+  WORD_ID delimiter = GetVocabulary().StoreIfNew("|||");
+
+  int lineNum = 0;
+  string line;
+  while(getline(*fileStreamP, line)) {
+    vector<WORD_ID> toks = GetVocabulary().Tokenize( line.c_str() );
+
+    corpus.push_back(vector< SentenceAlignment >());
+    vector< SentenceAlignment > &vec = corpus.back();
+
+    vec.push_back(SentenceAlignment());
+    SentenceAlignment *sentence = &vec.back();
+
+    const WORD &countStr = GetVocabulary().GetWord(toks[0]);
+    sentence->count = atoi(countStr.c_str());
+
+    for (size_t i = 1; i < toks.size(); ++i) {
+      WORD_ID wordId = toks[i];
+
+      if (wordId == delimiter) {
+        // target and alignments can have multiple sentences.
+        vec.push_back(SentenceAlignment());
+        sentence = &vec.back();
+
+        // count
+        ++i;
+
+        const WORD &countStr = GetVocabulary().GetWord(toks[i]);
+        sentence->count = atoi(countStr.c_str());
+      } else {
+        // just a normal word, add
+        sentence->target.push_back(wordId);
+      }
+    }
+
+    ++lineNum;
+
+  }
+
+}
+
+
+void FuzzyMatchWrapper::load_alignment(const std::string &fileName, vector< vector< SentenceAlignment > > &corpus )
+{
+  ifstream fileStream;
+  fileStream.open(fileName.c_str());
+  if (!fileStream) {
+    cerr << "file not found: " << fileName << endl;
+    exit(1);
+  }
+  cerr << "loading " << fileName << endl;
+
+  istream *fileStreamP = &fileStream;
+
+  string delimiter = "|||";
+
+  int lineNum = 0;
+  string line;
+  while(getline(*fileStreamP, line)) {
+    vector< SentenceAlignment > &vec = corpus[lineNum];
+    size_t targetInd = 0;
+    SentenceAlignment *sentence = &vec[targetInd];
+
+    vector<string> toks = Moses::Tokenize(line);
+
+    for (size_t i = 0; i < toks.size(); ++i) {
+      string &tok = toks[i];
+
+      if (tok == delimiter) {
+        // target and alignments can have multiple sentences.
+        ++targetInd;
+        sentence = &vec[targetInd];
+
+        ++i;
+      } else {
+        // just a normal alignment, add
+        vector<int> alignPoint = Moses::Tokenize<int>(tok, "-");
+        assert(alignPoint.size() == 2);
+        sentence->alignment.push_back(pair<int,int>(alignPoint[0], alignPoint[1]));
+      }
+    }
+
+    ++lineNum;
+
+  }
+}
+
+bool FuzzyMatchWrapper::GetLSEDCache(const std::pair< WORD_ID, WORD_ID > &key, unsigned int &value) const
+{
+#ifdef WITH_THREADS
+  boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+  map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = m_lsed.find( key );
+  if (lookup != m_lsed.end()) {
+    value = lookup->second;
+    return true;
+  }
+
+  return false;
+}
+
+void FuzzyMatchWrapper::SetLSEDCache(const std::pair< WORD_ID, WORD_ID > &key, const unsigned int &value)
+{
+#ifdef WITH_THREADS
+  boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+  m_lsed[ key ] = value;
+}
+
+/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
+
+unsigned int FuzzyMatchWrapper::letter_sed( WORD_ID aIdx, WORD_ID bIdx )
+{
+  // check if already computed -> lookup in cache
+  pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
+  unsigned int value;
+  bool ret = GetLSEDCache(pIdx, value);
+  if (ret) {
+    return value;
+  }
+
+  // get surface strings for word indices
+  const string &a = GetVocabulary().GetWord( aIdx );
+  const string &b = GetVocabulary().GetWord( bIdx );
+
+  // initialize cost matrix
+  unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int*  ), a.size()+1 );
+  for( unsigned int i=0; i<=a.size(); i++ ) {
+    cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+    cost[i][0] = i;
+  }
+  for( unsigned int j=0; j<=b.size(); j++ ) {
+    cost[0][j] = j;
+  }
+
+  // core string edit distance loop
+  for( unsigned int i=1; i<=a.size(); i++ ) {
+    for( unsigned int j=1; j<=b.size(); j++ ) {
+
+      unsigned int ins = cost[i-1][j] + 1;
+      unsigned int del = cost[i][j-1] + 1;
+      bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
+      unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
+
+      unsigned int min = (ins < del) ? ins : del;
+      min = (diag < min) ? diag : min;
+
+      cost[i][j] = min;
+    }
+  }
+
+  // clear out memory
+  unsigned int final = cost[a.size()][b.size()];
+  for( unsigned int i=0; i<=a.size(); i++ ) {
+    free( cost[i] );
+  }
+  free( cost );
+
+  // cache and return result
+  SetLSEDCache(pIdx, final);
+  return final;
+}
+
+/* string edit distance implementation */
+
+unsigned int FuzzyMatchWrapper::sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed )
+{
+
+  // initialize cost and path matrices
+  unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+  char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
+
+  for( unsigned int i=0; i<=a.size(); i++ ) {
+    cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+    path[i] = (char*) calloc( sizeof(char), b.size()+1 );
+    if (i>0) {
+      cost[i][0] = cost[i-1][0];
+      if (use_letter_sed) {
+        cost[i][0] += GetVocabulary().GetWord( a[i-1] ).size();
+      } else {
+        cost[i][0]++;
+      }
+    } else {
+      cost[i][0] = 0;
+    }
+    path[i][0] = 'I';
+  }
+
+  for( unsigned int j=0; j<=b.size(); j++ ) {
+    if (j>0) {
+      cost[0][j] = cost[0][j-1];
+      if (use_letter_sed) {
+        cost[0][j] +=	GetVocabulary().GetWord( b[j-1] ).size();
+      } else {
+        cost[0][j]++;
+      }
+    } else {
+      cost[0][j] = 0;
+    }
+    path[0][j] = 'D';
+  }
+
+  // core string edit distance algorithm
+  for( unsigned int i=1; i<=a.size(); i++ ) {
+    for( unsigned int j=1; j<=b.size(); j++ ) {
+      unsigned int ins = cost[i-1][j];
+      unsigned int del = cost[i][j-1];
+      unsigned int match;
+      if (use_letter_sed) {
+        ins += GetVocabulary().GetWord( a[i-1] ).size();
+        del += GetVocabulary().GetWord( b[j-1] ).size();
+        match = letter_sed( a[i-1], b[j-1] );
+      } else {
+        ins++;
+        del++;
+        match = ( a[i-1] == b[j-1] ) ? 0 : 1;
+      }
+      unsigned int diag = cost[i-1][j-1] + match;
+
+      char action = (ins < del) ? 'I' : 'D';
+      unsigned int min = (ins < del) ? ins : del;
+      if (diag < min) {
+        action = (match>0) ? 'S' : 'M';
+        min = diag;
+      }
+
+      cost[i][j] = min;
+      path[i][j] = action;
+    }
+  }
+
+  // construct string for best path
+  unsigned int i = a.size();
+  unsigned int j = b.size();
+  best_path = "";
+  while( i>0 || j>0 ) {
+    best_path = path[i][j] + best_path;
+    if (path[i][j] == 'I') {
+      i--;
+    } else if (path[i][j] == 'D') {
+      j--;
+    } else {
+      i--;
+      j--;
+    }
+  }
+
+
+  // clear out memory
+  unsigned int final = cost[a.size()][b.size()];
+
+  for( unsigned int i=0; i<=a.size(); i++ ) {
+    free( cost[i] );
+    free( path[i] );
+  }
+  free( cost );
+  free( path );
+
+  // return result
+  return final;
+}
+
+/* utlility function: compute length of sentence in characters
+ (spaces do not count) */
+
+unsigned int FuzzyMatchWrapper::compute_length( const vector< WORD_ID > &sentence )
+{
+  unsigned int length = 0;
+  for( unsigned int i=0; i<sentence.size(); i++ ) {
+    length += GetVocabulary().GetWord( sentence[i] ).size();
+  }
+  return length;
+}
+
+/* brute force method: compare input to all corpus sentences */
+
+void FuzzyMatchWrapper::basic_fuzzy_match( vector< vector< WORD_ID > > source,
+    vector< vector< WORD_ID > > input )
+{
+  // go through input set...
+  for(unsigned int i=0; i<input.size(); i++) {
+    bool use_letter_sed = false;
+
+    // compute sentence length and worst allowed cost
+    unsigned int input_length;
+    if (use_letter_sed) {
+      input_length = compute_length( input[i] );
+    } else {
+      input_length = input[i].size();
+    }
+    unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
+    string best_path = "";
+    //int best_match = -1;
+
+    // go through all corpus sentences
+    for(unsigned int s=0; s<source.size(); s++) {
+      int source_length;
+      if (use_letter_sed) {
+        source_length = compute_length( source[s] );
+      } else {
+        source_length = source[s].size();
+      }
+      int diff = abs((int)source_length - (int)input_length);
+      if (length_filter_flag && (diff >= best_cost)) {
+        continue;
+      }
+
+      // compute string edit distance
+      string path;
+      unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
+
+      // update if new best
+      if (cost < best_cost) {
+        best_cost = cost;
+        best_path = path;
+        //best_match = s;
+      }
+    }
+    //cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
+  }
+}
+
+/* definition of short matches
+ very short n-gram matches (1-grams) will not be looked up in
+ the suffix array, since there are too many matches
+ and for longer sentences, at least one 2-gram match must occur */
+
+int FuzzyMatchWrapper::short_match_max_length( int input_length )
+{
+  if ( ! refined_flag )
+    return 0;
+  if ( input_length >= 5 )
+    return 1;
+  return 0;
+}
+
+
+/* if we have non-short matches in a sentence, we need to
+ take a closer look at it.
+ this function creates a hash map for all input words and their positions
+ (to be used by the next function)
+ (done here, because this has be done only once for an input sentence) */
+
+void FuzzyMatchWrapper::init_short_matches(WordIndex &wordIndex, long translationId, const vector< WORD_ID > &input )
+{
+  int max_length = short_match_max_length( input.size() );
+  if (max_length == 0)
+    return;
+
+  wordIndex.clear();
+
+  // store input words and their positions in hash map
+  for(size_t i=0; i<input.size(); i++) {
+    if (wordIndex.find( input[i] ) == wordIndex.end()) {
+      vector< int > position_vector;
+      wordIndex[ input[i] ] = position_vector;
+    }
+    wordIndex[ input[i] ].push_back( i );
+  }
+}
+
+/* add all short matches to list of matches for a sentence */
+
+void FuzzyMatchWrapper::add_short_matches(WordIndex &wordIndex, long translationId, vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
+{
+  int max_length = short_match_max_length( input_length );
+  if (max_length == 0)
+    return;
+
+  int tm_length = tm.size();
+  map< WORD_ID,vector< int > >::iterator input_word_hit;
+  for(int t_pos=0; t_pos<tm.size(); t_pos++) {
+    input_word_hit = wordIndex.find( tm[t_pos] );
+    if (input_word_hit != wordIndex.end()) {
+      vector< int > &position_vector = input_word_hit->second;
+      for(size_t j=0; j<position_vector.size(); j++) {
+        int &i_pos = position_vector[j];
+
+        // before match
+        int max_cost = max( i_pos , t_pos );
+        int min_cost = abs( i_pos - t_pos );
+        if ( i_pos>0 && i_pos == t_pos )
+          min_cost++;
+
+        // after match
+        max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
+        min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
+        if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
+          min_cost++;
+
+        if (min_cost <= best_cost) {
+          Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
+          match.push_back( new_match );
+        }
+      }
+    }
+  }
+}
+
+/* remove matches that are subsumed by a larger match */
+
+vector< Match > FuzzyMatchWrapper::prune_matches( const vector< Match > &match, int best_cost )
+{
+  //cerr << "\tpruning";
+  vector< Match > pruned;
+  for(int i=match.size()-1; i>=0; i--) {
+    //cerr << " (" << match[i].input_start << "," << match[i].input_end
+    //		 << " ; " << match[i].tm_start << "," << match[i].tm_end
+    //		 << " * " << match[i].min_cost << ")";
+
+    //if (match[i].min_cost > best_cost)
+    //	continue;
+
+    bool subsumed = false;
+    for(int j=match.size()-1; j>=0; j--) {
+      if (i!=j // do not compare match with itself
+          && ( match[i].input_end - match[i].input_start <=
+               match[j].input_end - match[j].input_start ) // i shorter than j
+          && ((match[i].input_start == match[j].input_start &&
+               match[i].tm_start    == match[j].tm_start	) ||
+              (match[i].input_end   == match[j].input_end &&
+               match[i].tm_end      == match[j].tm_end) ) ) {
+        subsumed = true;
+      }
+    }
+    if (! subsumed && match[i].min_cost <= best_cost) {
+      //cerr << "*";
+      pruned.push_back( match[i] );
+    }
+  }
+  //cerr << endl;
+  return pruned;
+}
+
+/* A* parsing method to compute string edit distance */
+
+int FuzzyMatchWrapper::parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
+{
+  // cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
+
+  if (match.size() == 1)
+    return match[0].max_cost;
+  if (match.size() == 0)
+    return input_length+tm_length;
+
+  int this_best_cost = input_length + tm_length;
+  for(size_t i=0; i<match.size(); i++) {
+    this_best_cost = min( this_best_cost, match[i].max_cost );
+  }
+  // cerr << "\tthis best cost: " << this_best_cost << endl;
+
+  // bottom up combination of spans
+  vector< vector< Match > > multi_match;
+  multi_match.push_back( match );
+
+  int match_level = 1;
+  while(multi_match[ match_level-1 ].size()>0) {
+    // init vector
+    vector< Match > empty;
+    multi_match.push_back( empty );
+
+    for(int first_level = 0; first_level <= (match_level-1)/2; first_level++) {
+      int second_level = match_level - first_level -1;
+      //cerr << "\tcombining level " << first_level << " and " << second_level << endl;
+
+      vector< Match > &first_match  = multi_match[ first_level ];
+      vector< Match > &second_match = multi_match[ second_level ];
+
+      for(size_t i1 = 0; i1 < first_match.size(); i1++) {
+        for(size_t i2 = 0; i2 < second_match.size(); i2++) {
+
+          // do not combine the same pair twice
+          if (first_level == second_level && i2 <= i1) {
+            continue;
+          }
+
+          // get sorted matches (first is before second)
+          Match *first, *second;
+          if (first_match[i1].input_start < second_match[i2].input_start ) {
+            first = &first_match[i1];
+            second = &second_match[i2];
+          } else {
+            second = &first_match[i1];
+            first = &second_match[i2];
+          }
+
+          //cerr << "\tcombining "
+          //		 << "(" << first->input_start << "," << first->input_end << "), "
+          //		 << first->tm_start << " [" << first->internal_cost << "]"
+          //		 << " with "
+          //		 << "(" << second->input_start << "," << second->input_end << "), "
+          //		 << second->tm_start<< " [" << second->internal_cost << "]"
+          //		 << endl;
+
+          // do not process overlapping matches
+          if (first->input_end >= second->input_start) {
+            continue;
+          }
+
+          // no overlap / mismatch in tm
+          if (first->tm_end >= second->tm_start) {
+            continue;
+          }
+
+          // compute cost
+          int min_cost = 0;
+          int max_cost = 0;
+
+          // initial
+          min_cost += abs( first->input_start - first->tm_start );
+          max_cost += max( first->input_start, first->tm_start );
+
+          // same number of words, but not sent. start -> cost is at least 1
+          if (first->input_start == first->tm_start && first->input_start > 0) {
+            min_cost++;
+          }
+
+          // in-between
+          int skipped_words = second->input_start - first->input_end -1;
+          int skipped_words_tm = second->tm_start - first->tm_end -1;
+          int internal_cost = max( skipped_words, skipped_words_tm );
+          internal_cost += first->internal_cost + second->internal_cost;
+          min_cost += internal_cost;
+          max_cost += internal_cost;
+
+          // final
+          min_cost += abs( (tm_length-1 - second->tm_end) -
+                           (input_length-1 - second->input_end) );
+          max_cost += max( (tm_length-1 - second->tm_end),
+                           (input_length-1 - second->input_end) );
+
+          // same number of words, but not sent. end -> cost is at least 1
+          if ( ( input_length-1 - second->input_end
+                 == tm_length-1 - second->tm_end )
+               && input_length-1 != second->input_end ) {
+            min_cost++;
+          }
+
+          // cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
+
+          // if worst than best cost, forget it
+          if (min_cost > best_cost) {
+            continue;
+          }
+
+          // add match
+          Match new_match( first->input_start,
+                           second->input_end,
+                           first->tm_start,
+                           second->tm_end,
+                           min_cost,
+                           max_cost,
+                           internal_cost);
+          multi_match[ match_level ].push_back( new_match );
+          // cerr << "\tstored\n";
+
+          // possibly updating this_best_cost
+          if (max_cost < this_best_cost) {
+            // cerr << "\tupdating this best cost to " << max_cost << "\n";
+            this_best_cost = max_cost;
+
+            // possibly updating best_cost
+            if (max_cost < best_cost) {
+              // cerr << "\tupdating best cost to " << max_cost << "\n";
+              best_cost = max_cost;
+            }
+          }
+        }
+      }
+    }
+    match_level++;
+  }
+  return this_best_cost;
+}
+
+
+void FuzzyMatchWrapper::create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string  &path, ofstream &outputFile)
+{
+  string sourceStr;
+  for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
+    WORD_ID wordId = sourceSentence[pos];
+    sourceStr += GetVocabulary().GetWord(wordId) + " ";
+  }
+
+  for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
+    const SentenceAlignment &sentenceAlignment = targets[targetInd];
+    string targetStr = sentenceAlignment.getTargetString(GetVocabulary());
+    string alignStr = sentenceAlignment.getAlignmentString();
+
+    outputFile
+        << sentenceInd << endl
+        << cost << endl
+        << sourceStr << endl
+        << inputStr << endl
+        << targetStr << endl
+        << alignStr << endl
+        << path << endl
+        << sentenceAlignment.count << endl;
+
+  }
+}
+
+} // namespace
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..da50b64b9d97b0415fa983d3e7a91592c5a02d65
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.h
@@ -0,0 +1,91 @@
+//
+//  FuzzyMatchWrapper.h
+//  moses
+//
+//  Created by Hieu Hoang on 26/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef moses_FuzzyMatchWrapper_h
+#define moses_FuzzyMatchWrapper_h
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#endif
+
+#include <fstream>
+#include <string>
+#include "SuffixArray.h"
+#include "Vocabulary.h"
+#include "Match.h"
+#include "moses/InputType.h"
+
+namespace tmmt
+{
+class Match;
+struct SentenceAlignment;
+
+class FuzzyMatchWrapper
+{
+public:
+  FuzzyMatchWrapper(const std::string &source, const std::string &target, const std::string &alignment);
+
+  std::string Extract(long translationId, const std::string &dirNameStr);
+
+protected:
+  // tm-mt
+  std::vector< std::vector< tmmt::SentenceAlignment > > targetAndAlignment;
+  tmmt::SuffixArray *suffixArray;
+  int basic_flag;
+  int lsed_flag;
+  int refined_flag;
+  int length_filter_flag;
+  int parse_flag;
+  int min_match;
+  int multiple_flag;
+  int multiple_slack;
+  int multiple_max;
+
+  typedef std::map< WORD_ID,std::vector< int > > WordIndex;
+
+  // global cache for word pairs
+  std::map< std::pair< WORD_ID, WORD_ID >, unsigned int > m_lsed;
+#ifdef WITH_THREADS
+  //reader-writer lock
+  mutable boost::shared_mutex m_accessLock;
+#endif
+
+  void load_corpus( const std::string &fileName, std::vector< std::vector< tmmt::WORD_ID > > &corpus );
+  void load_target( const std::string &fileName, std::vector< std::vector< tmmt::SentenceAlignment > > &corpus);
+  void load_alignment( const std::string &fileName, std::vector< std::vector< tmmt::SentenceAlignment > > &corpus );
+
+  /** brute force method: compare input to all corpus sentences */
+  void basic_fuzzy_match( std::vector< std::vector< tmmt::WORD_ID > > source,
+                          std::vector< std::vector< tmmt::WORD_ID > > input ) ;
+
+  /** utlility function: compute length of sentence in characters
+   (spaces do not count) */
+  unsigned int compute_length( const std::vector< tmmt::WORD_ID > &sentence );
+  unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx );
+  unsigned int sed( const std::vector< WORD_ID > &a, const std::vector< WORD_ID > &b, std::string &best_path, bool use_letter_sed );
+  void init_short_matches(WordIndex &wordIndex, long translationId, const std::vector< WORD_ID > &input );
+  int short_match_max_length( int input_length );
+  void add_short_matches(WordIndex &wordIndex, long translationId, std::vector< Match > &match, const std::vector< WORD_ID > &tm, int input_length, int best_cost );
+  std::vector< Match > prune_matches( const std::vector< Match > &match, int best_cost );
+  int parse_matches( std::vector< Match > &match, int input_length, int tm_length, int &best_cost );
+
+  void create_extract(int sentenceInd, int cost, const std::vector< WORD_ID > &sourceSentence, const std::vector<SentenceAlignment> &targets, const std::string &inputStr, const std::string  &path, std::ofstream &outputFile);
+
+  std::string ExtractTM(WordIndex &wordIndex, long translationId, const std::string &inputPath);
+  Vocabulary &GetVocabulary() {
+    return suffixArray->GetVocabulary();
+  }
+
+  bool GetLSEDCache(const std::pair< WORD_ID, WORD_ID > &key, unsigned int &value) const;
+  void SetLSEDCache(const std::pair< WORD_ID, WORD_ID > &key, const unsigned int &value);
+
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/Match.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/Match.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb702850f44fd7b4705d81ff86d2357945c4b36e
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/Match.h
@@ -0,0 +1,34 @@
+//
+//  Match.h
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 25/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_Match_h
+#define fuzzy_match_Match_h
+
+namespace tmmt
+{
+
+/* data structure for n-gram match between input and corpus */
+
+class Match
+{
+public:
+  int input_start;
+  int input_end;
+  int tm_start;
+  int tm_end;
+  int min_cost;
+  int max_cost;
+  int internal_cost;
+  Match( int is, int ie, int ts, int te, int min, int max, int i )
+    :input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i) {
+  }
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp b/mosesdecoder/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eabdd875365c7a75639feebc1919d95ec1dde0bd
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/SentenceAlignment.cpp
@@ -0,0 +1,25 @@
+//
+//  SentenceAlignment.cpp
+//  moses
+//
+//  Created by Hieu Hoang on 26/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "util/string_stream.hh"
+#include "SentenceAlignment.h"
+
+namespace tmmt
+{
+std::string SentenceAlignment::getTargetString(const Vocabulary &vocab) const
+{
+  util::StringStream strme;
+  for (size_t i = 0; i < target.size(); ++i) {
+    const WORD &word = vocab.GetWord(target[i]);
+    strme << word << " ";
+  }
+  return strme.str();
+}
+
+}
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/SentenceAlignment.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/SentenceAlignment.h
new file mode 100644
index 0000000000000000000000000000000000000000..4d6dc430ccfc25481ef6b1bfff4b1e7dbbdea51f
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/SentenceAlignment.h
@@ -0,0 +1,43 @@
+//
+//  SentenceAlignment.h
+//  fuzzy-match
+//
+//  Created by Hieu Hoang on 25/07/2012.
+//  Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_SentenceAlignment_h
+#define fuzzy_match_SentenceAlignment_h
+
+#include <sstream>
+#include <vector>
+#include "Vocabulary.h"
+#include "util/string_stream.hh"
+
+namespace tmmt
+{
+
+struct SentenceAlignment {
+  int count;
+  std::vector< WORD_ID > target;
+  std::vector< std::pair<int,int> > alignment;
+
+  SentenceAlignment() {
+  }
+
+  std::string getTargetString(const Vocabulary &vocab) const;
+
+  std::string getAlignmentString() const {
+    util::StringStream strme;
+    for (size_t i = 0; i < alignment.size(); ++i) {
+      const std::pair<int,int> &alignPair = alignment[i];
+      strme << alignPair.first << "-" << alignPair.second << " ";
+    }
+    return strme.str();
+  }
+
+};
+
+}
+
+#endif
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/SuffixArray.cpp b/mosesdecoder/moses/TranslationModel/fuzzy-match/SuffixArray.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2930147ab03dc313e0ba8c4ae4df209895799b85
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/SuffixArray.cpp
@@ -0,0 +1,240 @@
+#include "SuffixArray.h"
+#include <string>
+#include <stdlib.h>
+#include <cstring>
+
+using namespace std;
+
+namespace tmmt
+{
+
+SuffixArray::SuffixArray( string fileName )
+{
+  m_vcb.StoreIfNew( "<uNk>" );
+  m_endOfSentence = m_vcb.StoreIfNew( "<s>" );
+
+  ifstream extractFile;
+
+  // count the number of words first;
+  extractFile.open(fileName.c_str());
+  istream *fileP = &extractFile;
+  m_size = 0;
+  size_t sentenceCount = 0;
+  string line;
+  while(getline(*fileP, line)) {
+
+    vector< WORD_ID > words = m_vcb.Tokenize( line.c_str() );
+    m_size += words.size() + 1;
+    sentenceCount++;
+  }
+  extractFile.close();
+  cerr << m_size << " words (incl. sentence boundaries)" << endl;
+
+  // allocate memory
+  m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
+  m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
+  m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
+  m_sentence = (size_t*) calloc( sizeof( size_t ), m_size );
+  m_sentenceLength = (char*) calloc( sizeof( char ), sentenceCount );
+
+  // fill the array
+  int wordIndex = 0;
+  int sentenceId = 0;
+  extractFile.open(fileName.c_str());
+  fileP = &extractFile;
+  while(getline(*fileP, line)) {
+    vector< WORD_ID > words = m_vcb.Tokenize( line.c_str() );
+
+    // add to corpus vector
+    corpus.push_back(words);
+
+    // create SA
+
+    vector< WORD_ID >::const_iterator i;
+    for( i=words.begin(); i!=words.end(); i++) {
+      m_index[ wordIndex ] = wordIndex;
+      m_sentence[ wordIndex ] = sentenceId;
+      m_wordInSentence[ wordIndex ] = i-words.begin();
+      m_array[ wordIndex++ ] = *i;
+    }
+    m_index[ wordIndex ] = wordIndex;
+    m_array[ wordIndex++ ] = m_endOfSentence;
+    m_sentenceLength[ sentenceId++ ] = words.size();
+  }
+  extractFile.close();
+  cerr << "done reading " << wordIndex << " words, " << sentenceId << " sentences." << endl;
+  // List(0,9);
+
+  // sort
+  m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
+  Sort( 0, m_size-1 );
+  free( m_buffer );
+  cerr << "done sorting" << endl;
+}
+
+// good ol' quick sort
+void SuffixArray::Sort(INDEX start, INDEX end)
+{
+  if (start == end) return;
+  INDEX mid = (start+end+1)/2;
+  Sort( start, mid-1 );
+  Sort( mid, end );
+
+  // merge
+  size_t i = start;
+  size_t j = mid;
+  size_t k = 0;
+  size_t length = end-start+1;
+  while( k<length ) {
+    if (i == mid ) {
+      m_buffer[ k++ ] = m_index[ j++ ];
+    } else if (j > end ) {
+      m_buffer[ k++ ] = m_index[ i++ ];
+    } else {
+      if (CompareIndex( m_index[i], m_index[j] ) < 0) {
+        m_buffer[ k++ ] = m_index[ i++ ];
+      } else {
+        m_buffer[ k++ ] = m_index[ j++ ];
+      }
+    }
+  }
+
+  memcpy( ((char*)m_index) + sizeof( INDEX ) * start,
+          ((char*)m_buffer), sizeof( INDEX ) * (end-start+1) );
+}
+
+SuffixArray::~SuffixArray()
+{
+  free(m_index);
+  free(m_array);
+}
+
+int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
+{
+  // skip over identical words
+  INDEX offset = 0;
+  while( a+offset < m_size &&
+         b+offset < m_size &&
+         m_array[ a+offset ] == m_array[ b+offset ] ) {
+    offset++;
+  }
+
+  if( a+offset == m_size ) return -1;
+  if( b+offset == m_size ) return 1;
+  return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
+}
+
+inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
+{
+  // cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
+  return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
+}
+
+int SuffixArray::Count( const vector< WORD > &phrase )
+{
+  INDEX dummy;
+  return LimitedCount( phrase, m_size, dummy, dummy, 0, m_size-1 );
+}
+
+bool SuffixArray::MinCount( const vector< WORD > &phrase, INDEX min )
+{
+  INDEX dummy;
+  return LimitedCount( phrase, min, dummy, dummy, 0, m_size-1 ) >= min;
+}
+
+bool SuffixArray::Exists( const vector< WORD > &phrase )
+{
+  INDEX dummy;
+  return LimitedCount( phrase, 1, dummy, dummy, 0, m_size-1 ) == 1;
+}
+
+int SuffixArray::FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+  return LimitedCount( phrase, m_size, firstMatch, lastMatch, search_start, search_end );
+}
+
+int SuffixArray::LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+  // cerr << "FindFirst\n";
+  INDEX start = search_start;
+  INDEX end = (search_end == -1) ? (m_size-1) : search_end;
+  INDEX mid = FindFirst( phrase, start, end );
+  // cerr << "done\n";
+  if (mid == m_size) return 0; // no matches
+  if (min == 1) return 1;      // only existance check
+
+  int matchCount = 1;
+
+  //cerr << "before...\n";
+  firstMatch = FindLast( phrase, mid, start, -1 );
+  matchCount += mid - firstMatch;
+
+  //cerr << "after...\n";
+  lastMatch = FindLast( phrase, mid, end, 1 );
+  matchCount += lastMatch - mid;
+
+  return matchCount;
+}
+
+SuffixArray::INDEX SuffixArray::FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction )
+{
+  end += direction;
+  while(true) {
+    INDEX mid = ( start + end + (direction>0 ? 0 : 1) )/2;
+
+    int match = Match( phrase, mid );
+    int matchNext = Match( phrase, mid+direction );
+    //cerr << "\t" << start << ";" << mid << ";" << end << " -> " << match << "," << matchNext << endl;
+
+    if (match == 0 && matchNext != 0) return mid;
+
+    if (match == 0) // mid point is a match
+      start = mid;
+    else
+      end = mid;
+  }
+}
+
+SuffixArray::INDEX SuffixArray::FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end )
+{
+  while(true) {
+    INDEX mid = ( start + end + 1 )/2;
+    //cerr << "FindFirst(" << start << ";" << mid << ";" << end << ")\n";
+    int match = Match( phrase, mid );
+
+    if (match == 0) return mid;
+    if (start >= end && match != 0 ) return m_size;
+
+    if (match > 0)
+      start = mid+1;
+    else
+      end = mid-1;
+  }
+}
+
+int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
+{
+  INDEX pos = m_index[ index ];
+  for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++) {
+    int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
+    // cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
+    if (match != 0)
+      return match;
+  }
+  return 0;
+}
+
+void SuffixArray::List(INDEX start, INDEX end)
+{
+  for(INDEX i=start; i<=end; i++) {
+    INDEX pos = m_index[ i ];
+    // cerr << i << ":" << pos << "\t";
+    for(int j=0; j<5 && j+pos<m_size; j++) {
+      //cout << " " << m_vcb.GetWord( m_array[ pos+j ] );
+    }
+    // cerr << "\n";
+  }
+}
+
+}
+
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/SuffixArray.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/SuffixArray.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2dbf892c11595219213ed3460cc758bf0cd906e
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/SuffixArray.h
@@ -0,0 +1,69 @@
+#include "Vocabulary.h"
+
+#pragma once
+
+#define LINE_MAX_LENGTH 10000
+
+namespace tmmt
+{
+
+class SuffixArray
+{
+public:
+  typedef unsigned int INDEX;
+
+private:
+  std::vector< std::vector< WORD_ID > > corpus;
+
+  WORD_ID *m_array;
+  INDEX *m_index;
+  INDEX *m_buffer;
+  char *m_wordInSentence;
+  size_t *m_sentence;
+  char *m_sentenceLength;
+  WORD_ID m_endOfSentence;
+  Vocabulary m_vcb;
+  INDEX m_size;
+
+public:
+  SuffixArray( std::string fileName );
+  ~SuffixArray();
+
+  void Sort(INDEX start, INDEX end);
+  int CompareIndex( INDEX a, INDEX b ) const;
+  inline int CompareWord( WORD_ID a, WORD_ID b ) const;
+  int Count( const std::vector< WORD > &phrase );
+  bool MinCount( const std::vector< WORD > &phrase, INDEX min );
+  bool Exists( const std::vector< WORD > &phrase );
+  int FindMatches( const std::vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
+  int LimitedCount( const std::vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
+  INDEX FindFirst( const std::vector< WORD > &phrase, INDEX &start, INDEX &end );
+  INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
+  int Match( const std::vector< WORD > &phrase, INDEX index );
+  void List( INDEX start, INDEX end );
+  inline INDEX GetPosition( INDEX index ) {
+    return m_index[ index ];
+  }
+  inline size_t GetSentence( INDEX position ) {
+    return m_sentence[position];
+  }
+  inline char GetWordInSentence( INDEX position ) {
+    return m_wordInSentence[position];
+  }
+  inline char GetSentenceLength( size_t sentenceId ) {
+    return m_sentenceLength[sentenceId];
+  }
+  inline INDEX GetSize() {
+    return m_size;
+  }
+
+  Vocabulary &GetVocabulary() {
+    return m_vcb;
+  }
+  const std::vector< std::vector< WORD_ID > > &GetCorpus() const {
+    return corpus;
+  }
+};
+
+}
+
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/Vocabulary.cpp b/mosesdecoder/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b70eb98cab099790dbce73ff4060192fb5ae2e2f
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
@@ -0,0 +1,71 @@
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+#ifdef WITH_THREADS
+#include <boost/thread/locks.hpp>
+#endif
+
+using namespace std;
+
+namespace tmmt
+{
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
+{
+  vector< WORD_ID > token;
+  bool betweenWords = true;
+  int start=0;
+  int i=0;
+  for(; input[i] != '\0'; i++) {
+    bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+    if (!isSpace && betweenWords) {
+      start = i;
+      betweenWords = false;
+    } else if (isSpace && !betweenWords) {
+      token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+      betweenWords = true;
+    }
+  }
+  if (!betweenWords)
+    token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+  return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word )
+{
+
+  {
+    // read=lock scope
+#ifdef WITH_THREADS
+    boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+    map<WORD, WORD_ID>::iterator i = lookup.find( word );
+
+    if( i != lookup.end() )
+      return i->second;
+  }
+
+#ifdef WITH_THREADS
+  boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+  WORD_ID id = vocab.size();
+  vocab.push_back( word );
+  lookup[ word ] = id;
+  return id;
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word )
+{
+#ifdef WITH_THREADS
+  boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+  map<WORD, WORD_ID>::iterator i = lookup.find( word );
+  if( i == lookup.end() )
+    return 0;
+  WORD_ID w= (WORD_ID) i->second;
+  return w;
+}
+
+}
+
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/Vocabulary.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/Vocabulary.h
new file mode 100644
index 0000000000000000000000000000000000000000..f5245ebe3b72521fe6d3d65efb04eb5c53aa946a
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/Vocabulary.h
@@ -0,0 +1,46 @@
+// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
+
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <cassert>
+#include <cstdlib>
+#include <string>
+#include <queue>
+#include <map>
+#include <cmath>
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#endif
+
+namespace tmmt
+{
+typedef std::string WORD;
+typedef unsigned int WORD_ID;
+
+class Vocabulary
+{
+public:
+  std::map<WORD, WORD_ID> lookup;
+  std::vector< WORD > vocab;
+  WORD_ID StoreIfNew( const WORD& );
+  WORD_ID GetWordID( const WORD& );
+  std::vector<WORD_ID> Tokenize( const char[] );
+  inline WORD &GetWord( WORD_ID id ) const {
+    WORD &i = (WORD&) vocab[ id ];
+    return i;
+  }
+
+protected:
+#ifdef WITH_THREADS
+  //reader-writer lock
+  mutable boost::shared_mutex m_accessLock;
+#endif
+
+
+};
+
+}
+
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/create_xml.cpp b/mosesdecoder/moses/TranslationModel/fuzzy-match/create_xml.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a31b9b2861ee494b9416cdeeb9c0481896b8e07
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/create_xml.cpp
@@ -0,0 +1,387 @@
+
+#include <iostream>
+#include <fstream>
+#include <cassert>
+#include <vector>
+#include <string>
+#include "moses/Util.h"
+#include "Alignments.h"
+
+using namespace std;
+using namespace Moses;
+
+inline const std::string TrimInternal(const std::string& str, const std::string dropChars = " \t\n\r")
+{
+  std::string res = str;
+  res.erase(str.find_last_not_of(dropChars)+1);
+  return res.erase(0, res.find_first_not_of(dropChars));
+}
+
+class CreateXMLRetValues
+{
+public:
+  string frame, ruleS, ruleT, ruleAlignment, ruleAlignmentInv;
+};
+
+CreateXMLRetValues createXML(int ruleCount, const string &source, const string &input, const string &target, const string &align, const string &path );
+
+void create_xml(const string &inPath)
+{
+  ifstream inStrme(inPath.c_str());
+  ofstream rule((inPath + ".extract").c_str());
+  ofstream ruleInv((inPath + ".extract.inv").c_str());
+
+  // int setenceId;
+  // float score;
+  string source, target, align, path;
+  string *input = NULL;
+  int count;
+
+  int lineCount = 1;
+  int ruleCount = 1;
+  string inLine;
+
+  int step = 0;
+  while (!inStrme.eof()) {
+    getline(inStrme, inLine);
+    //cout << inLine << endl;
+    switch (step) {
+    case 0:
+      /*setenceId = */
+      Scan<int>(inLine);
+      ++step;
+      break;
+    case 1:
+      /*score = */
+      Scan<float>(inLine);
+      ++step;
+      break;
+    case 2:
+      source = inLine;
+      ++step;
+      break;
+    case 3:
+      if (input == NULL) {
+        input = new string(inLine);
+      } else {
+        assert(inLine == *input);
+      }
+      ++step;
+      break;
+    case 4:
+      target = inLine;
+      ++step;
+      break;
+    case 5:
+      align = inLine;
+      ++step;
+      break;
+    case 6:
+      path = inLine + "X";
+      ++step;
+      break;
+    case 7:
+      count = Scan<int>(inLine);
+      CreateXMLRetValues ret = createXML(ruleCount, source, *input, target, align, path);
+
+      //print STDOUT $frame."\n";
+      rule << ret.ruleS << " [X] ||| " << ret.ruleT << " [X] ||| " << ret.ruleAlignment
+           << " ||| " << count << endl;
+      ruleInv << ret.ruleT << " [X] ||| " << ret.ruleS << " [X] ||| " << ret.ruleAlignmentInv
+              << " ||| " << count << endl;
+
+      //print STDOUT "$sentenceInd ||| $score ||| $count\n";
+      ++ruleCount;
+      step = 0;
+      break;
+    }
+
+    ++lineCount;
+  }
+
+  delete input;
+  ruleInv.close();
+  rule.close();
+  inStrme.close();
+
+}
+
+
+CreateXMLRetValues createXML(int ruleCount, const string &source, const string &input, const string &target, const string &align, const string &path)
+{
+  CreateXMLRetValues ret;
+  vector<string> sourceToks   = Tokenize(source, " ")
+                                ,inputToks    = Tokenize(input, " ")
+                                    ,targetsToks  = Tokenize(target, " ");
+  Alignments alignments(align, sourceToks.size(), targetsToks.size());
+  map<int, string> frameInput;
+  map<int, int> alignI2S;
+  vector< map<string, int> > nonTerms;
+  vector<bool> targetBitmap(targetsToks.size(), true);
+  vector<bool> inputBitmap;
+
+  // STEP 1: FIND MISMATCHES
+  int s = 0, i = 0;
+  bool currently_matching = false;
+  int start_s = 0, start_i = 0;
+
+  //cerr << input << endl << source << endl << target << endl << path << endl;
+  for ( int p = 0 ; p < int(path.length()) ; p++ ) {
+    string action = path.substr(p, 1);
+
+    // beginning of a mismatch
+    if ( currently_matching && action != "M" && action != "X" ) {
+      start_i            = i;
+      start_s            = s;
+      currently_matching = 0;
+    } // if ( currently_matching
+    // end of a mismatch
+    else if ( !currently_matching && ( action == "M" || action == "X" ) ) {
+
+      // remove use of affected target words
+      for ( int ss = start_s ; ss < s ; ss++ ) {
+        const std::map<int, int> &targets = alignments.m_alignS2T[ss];
+
+        std::map<int, int>::const_iterator iter;
+        for (iter = targets.begin(); iter != targets.end(); ++iter) {
+          int tt = iter->first;
+          targetBitmap[tt] = 0;
+        }
+
+        // also remove enclosed unaligned words?
+      } //for ( int ss = start_s ; ss < s ; ss++ ) {
+
+      // are there input words that need to be inserted ?
+      //cerr << start_i << "<" << i << "?" << endl;
+      if (start_i < i ) {
+
+        // take note of input words to be inserted
+        string insertion = "";
+        for (int ii = start_i ; ii < i ; ii++ ) {
+          insertion += inputToks[ii] + " ";
+        }
+
+        // find position for inserted input words
+
+        // find first removed target word
+        int start_t = 1000;
+        for ( int ss = start_s ; ss < s ; ss++ ) {
+          const std::map<int, int> &targets = alignments.m_alignS2T[ss];
+
+          std::map<int, int>::const_iterator iter;
+          for (iter = targets.begin(); iter != targets.end(); ++iter) {
+            int tt = iter->first;
+            if (tt < start_t) {
+              start_t = tt;
+            }
+          }
+        }
+
+        // end of sentence? add to end
+        if ( start_t == 1000 && i > int(inputToks.size()) - 1 ) {
+          start_t = targetsToks.size() - 1;
+        }
+
+        // backtrack to previous words if unaligned
+        if ( start_t == 1000 ) {
+          start_t = -1;
+          for ( int ss = s - 1 ; start_t == -1 && ss >= 0 ; ss-- ) {
+            const std::map<int, int> &targets = alignments.m_alignS2T[ss];
+
+            std::map<int, int>::const_iterator iter;
+            for (iter = targets.begin(); iter != targets.end(); ++iter) {
+              int tt = iter->first;
+              if (tt > start_t) {
+                start_t = tt;
+              }
+            }
+          }
+        } // if ( start_t == 1000 ) {
+
+        frameInput[start_t] += insertion;
+        map<string, int> nt;
+        nt["start_t"] = start_t;
+        nt["start_i"] = start_i;
+        nonTerms.push_back(nt);
+
+      } // if (start_i < i ) {
+
+      currently_matching = 1;
+    } // else if ( !currently_matching
+
+    /*
+    cerr << action << " " << s << " " << i
+    		<< "(" << start_s << " " << start_i << ")"
+    		<< currently_matching;
+     */
+
+    if ( action != "I" ) {
+      //cerr << " ->";
+
+      if (s < int(alignments.m_alignS2T.size())) {
+        const std::map<int, int> &targets = alignments.m_alignS2T[s];
+        //cerr << "s=" << s << endl;
+
+        std::map<int, int>::const_iterator iter;
+        for (iter = targets.begin(); iter != targets.end(); ++iter) {
+          // int tt = iter->first;
+          //cerr << " " << tt;
+        }
+      }
+    }
+    //cerr << endl;
+
+    if (action != "I")
+      s++;
+    if (action != "D") {
+      i++;
+      alignI2S[i] = s;
+    }
+
+    if (action == "M") {
+      inputBitmap.push_back(1);
+    } else if (action == "I" || action == "S") {
+      inputBitmap.push_back(0);
+    }
+
+  } // for ( int p = 0
+
+  //cerr << target << endl;
+  for (size_t i = 0; i < targetBitmap.size(); ++i) {
+    //cerr << targetBitmap[i];
+  }
+  //cerr << endl;
+
+  for (map<int, string>::const_iterator iter = frameInput.begin(); iter != frameInput.end(); ++iter) {
+    //cerr << iter->first << ":" <<iter->second << endl;
+  }
+
+  // STEP 2: BUILD RULE AND FRAME
+
+  // hierarchical rule
+  int rule_pos_s = 0;
+  map<int, int> ruleAlignS;
+
+  for (int i = 0 ; i < int(inputBitmap.size()) ; ++i ) {
+    if ( inputBitmap[i] ) {
+      ret.ruleS += inputToks[i] + " ";
+      ruleAlignS[ alignI2S[i] ] = rule_pos_s++;
+    }
+
+    for (size_t j = 0; j < nonTerms.size(); ++j) {
+      map<string, int> &nt = nonTerms[j];
+      if (i == nt["start_i"]) {
+        ret.ruleS += "[X][X] ";
+        nt["rule_pos_s"] = rule_pos_s++;
+      }
+    }
+  }
+
+  int rule_pos_t = 0;
+  map<int, int> ruleAlignT;
+
+  for (int t = -1 ; t < (int) targetBitmap.size(); t++ ) {
+    if (t >= 0 && targetBitmap[t]) {
+      ret.ruleT += targetsToks[t] + " ";
+      ruleAlignT[t] = rule_pos_t++;
+    }
+
+    for (size_t i = 0; i < nonTerms.size(); ++i) {
+      map<string, int> &nt = nonTerms[i];
+
+      if (t == nt["start_t"]) {
+        ret.ruleT += "[X][X] ";
+        nt["rule_pos_t"] = rule_pos_t++;
+      }
+    }
+  }
+
+  int numAlign = 0;
+  ret.ruleAlignment = "";
+
+  for (map<int, int>::const_iterator iter = ruleAlignS.begin(); iter != ruleAlignS.end(); ++iter) {
+    int s = iter->first;
+
+    if (s < int(alignments.m_alignS2T.size())) {
+      const std::map<int, int> &targets = alignments.m_alignS2T[s];
+
+      std::map<int, int>::const_iterator iter;
+      for (iter = targets.begin(); iter != targets.end(); ++iter) {
+        int t =iter->first;
+        if (ruleAlignT.find(t) == ruleAlignT.end())
+          continue;
+        ret.ruleAlignment += SPrint(ruleAlignS[s]) + "-" + SPrint(ruleAlignT[t]) + " ";
+        ++numAlign;
+      }
+    }
+  }
+
+  //cerr << "numAlign=" << numAlign << endl;
+
+  for (size_t i = 0; i < nonTerms.size(); ++i) {
+    map<string, int> &nt = nonTerms[i];
+    ret.ruleAlignment += SPrint(nt["rule_pos_s"]) + "-" + SPrint(nt["rule_pos_t"]) + " ";
+    ++numAlign;
+  }
+
+  //cerr << "numAlign=" << numAlign << endl;
+
+  ret.ruleS = TrimInternal(ret.ruleS);
+  ret.ruleT = TrimInternal(ret.ruleT);
+  ret.ruleAlignment = TrimInternal(ret.ruleAlignment);
+
+  vector<string> ruleAlignmentToks = Tokenize(ret.ruleAlignment);
+  for (size_t i = 0; i < ruleAlignmentToks.size(); ++i) {
+    const string &alignPoint = ruleAlignmentToks[i];
+    vector<string> toks = Tokenize(alignPoint, "-");
+    assert(toks.size() == 2);
+    ret.ruleAlignmentInv += toks[1] + "-" +toks[0];
+  }
+  ret.ruleAlignmentInv = TrimInternal(ret.ruleAlignmentInv);
+
+  // frame
+  // ret.frame;
+  if (frameInput.find(-1) == frameInput.end())
+    ret.frame = frameInput[-1];
+
+  int currently_included = 0;
+  int start_t            = -1;
+  targetBitmap.push_back(0);
+
+  for (size_t t = 0 ; t <= targetsToks.size() ; t++ ) {
+    // beginning of tm target inclusion
+    if ( !currently_included && targetBitmap[t] ) {
+      start_t            = t;
+      currently_included = 1;
+    }
+    // end of tm target inclusion (not included word or inserted input)
+    else if (currently_included
+             && ( targetBitmap[t] || frameInput.find(t) != frameInput.end() )
+            ) {
+      // add xml (unless change is at the beginning of the sentence
+      if ( start_t >= 0 ) {
+        string target = "";
+        //cerr << "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+        for (size_t tt = start_t ; tt < t + targetBitmap[t] ; tt++ ) {
+          target += targetsToks[tt] + " ";
+        }
+        // target = Trim(target); TODO
+        ret.frame += "<xml translation=\"" + target + "\"> x </xml> ";
+      }
+      currently_included = 0;
+    }
+
+    if (frameInput.find(t) != frameInput.end())
+      ret.frame += frameInput[t];
+    //cerr << targetBitmap[t] << " " << t << " " << "(" << start_t << ")"
+    //			<< currently_included << endl;
+
+  } //for (int t = 0
+
+  cerr << ret.frame << "\n-------------------------------------\n";
+  return ret;
+
+}
+
+
+
diff --git a/mosesdecoder/moses/TranslationModel/fuzzy-match/create_xml.h b/mosesdecoder/moses/TranslationModel/fuzzy-match/create_xml.h
new file mode 100644
index 0000000000000000000000000000000000000000..3a49a1fc09b46664c3da127a7c60772824b1644b
--- /dev/null
+++ b/mosesdecoder/moses/TranslationModel/fuzzy-match/create_xml.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <string>
+
+void create_xml(const std::string &inPath);
diff --git a/mosesdecoder/moses/server/Hypothesis_4server.cpp b/mosesdecoder/moses/server/Hypothesis_4server.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ace9c9672bfa69c9c5dcc2cdfd504ab6bbc6724
--- /dev/null
+++ b/mosesdecoder/moses/server/Hypothesis_4server.cpp
@@ -0,0 +1,37 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#include "moses/Hypothesis.h"
+#include "moses/Manager.h"
+#include <boost/foreach.hpp>
+namespace Moses {
+  void
+  Hypothesis::
+  OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest) const
+  {
+    using namespace std;
+    Range const& src = this->GetCurrSourceWordsRange();
+    Range const& trg = this->GetCurrTargetWordsRange();
+
+    WordAlignmentSort waso = m_manager.options()->output.WA_SortOrder;
+    vector<pair<size_t,size_t> const* > a
+      = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso);
+    typedef pair<size_t,size_t> item;
+    BOOST_FOREACH(item const* p, a) {
+      map<string, xmlrpc_c::value> M;
+      M["source-word"] = xmlrpc_c::value_int(src.GetStartPos() + p->first);
+      M["target-word"] = xmlrpc_c::value_int(trg.GetStartPos() + p->second);
+      dest.push_back(xmlrpc_c::value_struct(M));
+    }
+  }
+
+  void
+  Hypothesis::
+  OutputWordAlignment(std::vector<xmlrpc_c::value>& out) const
+  {
+    std::vector<Hypothesis const*> tmp;
+    for (Hypothesis const* h = this; h; h = h->GetPrevHypo())
+      tmp.push_back(h);
+    for (size_t i = tmp.size(); i-- > 0;)
+      tmp[i]->OutputLocalWordAlignment(out);
+  }
+
+}
diff --git a/mosesdecoder/moses/server/Optimizer.h b/mosesdecoder/moses/server/Optimizer.h
new file mode 100644
index 0000000000000000000000000000000000000000..da84df0238b30dc9ff9eb320f5cf6e541de9850f
--- /dev/null
+++ b/mosesdecoder/moses/server/Optimizer.h
@@ -0,0 +1,17 @@
+// -*- c++ -*-
+
+#include <xmlrpc-c/base.hpp>
+#include <xmlrpc-c/registry.hpp>
+#include <xmlrpc-c/server_abyss.hpp>
+
+namespace MosesServer
+{
+class
+  Optimizer : public xmlrpc_c::method
+{
+public:
+  Optimizer();
+  void execute(xmlrpc_c::paramList const& paramList,
+               xmlrpc_c::value *   const  retvalP);
+};
+}
diff --git a/mosesdecoder/moses/server/PackScores.cpp b/mosesdecoder/moses/server/PackScores.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4ec6109d3eeb38fc665e94f427d824da7495de0c
--- /dev/null
+++ b/mosesdecoder/moses/server/PackScores.cpp
@@ -0,0 +1,45 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#include "PackScores.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+#include <boost/foreach.hpp>
+namespace Moses {
+
+void
+PackScores(FeatureFunction const& ff, FVector const& S,
+	   std::map<std::string, xmlrpc_c::value>& M)
+{
+  std::vector<xmlrpc_c::value> v;
+  size_t N = ff.GetNumScoreComponents();
+
+  std::vector<xmlrpc_c::value> dense; 
+  dense.reserve(N);
+  size_t o = ff.GetIndex();
+  for (size_t i = 0; i < N; ++i) 
+    if (ff.IsTuneableComponent(i)) 
+      dense.push_back(xmlrpc_c::value_double(S[o+i]));
+  v.push_back(xmlrpc_c::value_array(dense)); 
+
+  std::map<std::string,xmlrpc_c::value> sparse;
+  typedef FVector::FNVmap::const_iterator iter;
+  for(iter m = S.cbegin(); m != S.cend(); ++m)
+    sparse[m->first.name()] = xmlrpc_c::value_double(m->second);
+  v.push_back(xmlrpc_c::value_struct(sparse));
+  M[ff.GetScoreProducerDescription()] = xmlrpc_c::value_array(v);
+}
+
+xmlrpc_c::value
+PackScores(ScoreComponentCollection const& S)
+{
+  std::map<std::string, xmlrpc_c::value> M;
+  typedef StatefulFeatureFunction SFFF;
+  typedef StatelessFeatureFunction SLFF;
+  BOOST_FOREACH(SFFF const* ff, SFFF::GetStatefulFeatureFunctions())
+    if (ff->IsTuneable()) 
+      PackScores(*ff, S.GetScoresVector(), M);
+  BOOST_FOREACH(SLFF const* ff, SLFF::GetStatelessFeatureFunctions())
+    if (ff->IsTuneable()) 
+      PackScores(*ff, S.GetScoresVector(), M);
+  return xmlrpc_c::value_struct(M);
+}
+}
diff --git a/mosesdecoder/moses/server/PackScores.h b/mosesdecoder/moses/server/PackScores.h
new file mode 100644
index 0000000000000000000000000000000000000000..5d875bc3bacc940181f4b98afe30db24f08b2bb0
--- /dev/null
+++ b/mosesdecoder/moses/server/PackScores.h
@@ -0,0 +1,10 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#pragma once
+#include <xmlrpc-c/base.hpp>
+#include "moses/FF/FeatureFunction.h"
+#include "moses/ScoreComponentCollection.h"
+
+namespace Moses {
+  xmlrpc_c::value 
+  PackScores(ScoreComponentCollection const& S);
+}
diff --git a/mosesdecoder/moses/server/Server.h b/mosesdecoder/moses/server/Server.h
new file mode 100644
index 0000000000000000000000000000000000000000..802eaef3e4f4c0846dc5d27103c062a5fc08888a
--- /dev/null
+++ b/mosesdecoder/moses/server/Server.h
@@ -0,0 +1,46 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include "moses/TypeDef.h"
+
+#ifdef WITH_THREADS
+#include <boost/thread.hpp>
+#include "moses/ThreadPool.h"
+#endif
+
+#include <xmlrpc-c/base.hpp>
+#include <xmlrpc-c/registry.hpp>
+#include <xmlrpc-c/server_abyss.hpp>
+#include "Translator.h"
+#include "Optimizer.h"
+#include "Updater.h"
+#include "CloseSession.h"
+#include "Session.h"
+#include "moses/parameters/ServerOptions.h"
+#include <string>
+
+namespace MosesServer
+{
+  class Server
+  {
+    Moses::ServerOptions m_server_options;
+    SessionCache   m_session_cache;
+    xmlrpc_c::registry m_registry;
+    xmlrpc_c::methodPtr const m_updater;
+    xmlrpc_c::methodPtr const m_optimizer;
+    xmlrpc_c::methodPtr const m_translator;
+    xmlrpc_c::methodPtr const m_close_session;
+    std::string m_pidfile;
+  public:
+    Server(Moses::Parameter& params);
+    ~Server();
+    int run();
+    void delete_session(uint64_t const session_id);
+
+    Moses::ServerOptions const& 
+    options() const;
+    
+    Session const& 
+    get_session(uint64_t session_id);
+
+  };
+}
diff --git a/mosesdecoder/moses/server/Session.h b/mosesdecoder/moses/server/Session.h
new file mode 100644
index 0000000000000000000000000000000000000000..27d5ca8455fe858ff03f0da87d4b339b7204bf0d
--- /dev/null
+++ b/mosesdecoder/moses/server/Session.h
@@ -0,0 +1,75 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
+#pragma once
+#include "moses/Util.h"
+#include "moses/ContextScope.h"
+#include "moses/parameters/AllOptions.h"
+#include <sys/time.h>
+#include <boost/unordered_map.hpp>
+
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/thread/locks.hpp>
+#endif
+namespace MosesServer{
+  
+  struct Session
+  {
+    uint64_t const id;
+    time_t start_time;
+    time_t last_access;
+    boost::shared_ptr<Moses::ContextScope> const scope; // stores local info
+    SPTR<std::map<std::string,float> > m_context_weights;
+
+    
+    Session(uint64_t const session_id) 
+      : id(session_id)
+      , scope(new Moses::ContextScope) 
+    { 
+      last_access = start_time = time(NULL); 
+    }
+
+    bool is_new() const { return last_access == start_time; }
+
+    void setup(std::map<std::string, xmlrpc_c::value> const& params);
+  };
+
+  class SessionCache
+  {
+    mutable boost::shared_mutex m_lock;
+    uint64_t m_session_counter;
+    boost::unordered_map<uint64_t,Session> m_cache;
+  public:
+
+    SessionCache() : m_session_counter(1) {}
+
+    Session const& 
+    operator[](uint32_t id)
+    {
+      boost::upgrade_lock<boost::shared_mutex> lock(m_lock);
+      if (id > 1) 
+        {
+          boost::unordered_map<uint64_t, Session>::iterator m = m_cache.find(id);
+          if (m != m_cache.end()) 
+            {
+              m->second.last_access = time(NULL);
+              return m->second;
+            }
+        }
+      boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
+      id = ++m_session_counter;
+      std::pair<uint64_t, Session> foo(id, Session(id));
+      return m_cache.insert(foo).first->second;
+    }
+
+    void
+    erase(uint32_t const id)
+    {
+      boost::unique_lock<boost::shared_mutex> lock(m_lock);
+      m_cache.erase(id);
+    }
+
+
+  };
+
+
+}
diff --git a/mosesdecoder/moses/server/TranslationRequest.cpp b/mosesdecoder/moses/server/TranslationRequest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d2118ad8f79c5908236483816a2b65773f5d2207
--- /dev/null
+++ b/mosesdecoder/moses/server/TranslationRequest.cpp
@@ -0,0 +1,524 @@
+#include "TranslationRequest.h"
+#include "PackScores.h"
+#include "moses/ContextScope.h"
+#include <boost/foreach.hpp>
+#include "moses/Util.h"
+#include "moses/Hypothesis.h"
+
+namespace MosesServer
+{
+using namespace std;
+using Moses::Hypothesis;
+using Moses::StaticData;
+using Moses::Range;
+using Moses::ChartHypothesis;
+using Moses::Phrase;
+using Moses::Manager;
+using Moses::SearchGraphNode;
+using Moses::TrellisPathList;
+using Moses::TranslationOptionCollection;
+using Moses::TranslationOptionList;
+using Moses::TranslationOption;
+using Moses::TargetPhrase;
+using Moses::FValue;
+using Moses::PhraseDictionaryMultiModel;
+using Moses::FindPhraseDictionary;
+using Moses::Sentence;
+using Moses::TokenizeMultiCharSeparator;
+using Moses::FeatureFunction;
+using Moses::Scan;
+
+boost::shared_ptr<TranslationRequest>
+TranslationRequest::
+create(Translator* translator, xmlrpc_c::paramList const& paramList,
+       boost::condition_variable& cond, boost::mutex& mut)
+{
+  boost::shared_ptr<TranslationRequest> ret;
+  ret.reset(new TranslationRequest(paramList, cond, mut));
+  ret->m_self = ret;
+  ret->m_translator = translator;
+  return ret;
+}
+
+void
+SetContextWeights(Moses::ContextScope& s, xmlrpc_c::value const& w)
+{
+  SPTR<std::map<std::string,float> > M(new std::map<std::string, float>);
+  typedef std::map<std::string,xmlrpc_c::value> tmap;
+  tmap const tmp = static_cast<tmap>(xmlrpc_c::value_struct(w));
+  for(tmap::const_iterator m = tmp.begin(); m != tmp.end(); ++m)
+    (*M)[m->first] = xmlrpc_c::value_double(m->second);
+  s.SetContextWeights(M);
+}
+  
+void
+TranslationRequest::
+Run()
+{
+  typedef std::map<std::string,xmlrpc_c::value> param_t;
+  param_t const& params = m_paramList.getStruct(0);
+  parse_request(params);
+  // cerr << "SESSION ID" << ret->m_session_id << endl;
+
+
+  // settings within the session scope
+  param_t::const_iterator si = params.find("context-weights");
+  if (si != params.end()) SetContextWeights(*m_scope, si->second);
+  
+  Moses::StaticData const& SD = Moses::StaticData::Instance();
+
+  if (is_syntax(m_options->search.algo))
+    run_chart_decoder();
+  else
+    run_phrase_decoder();
+
+  {
+    boost::lock_guard<boost::mutex> lock(m_mutex);
+    m_done = true;
+  }
+  m_cond.notify_one();
+
+}
+
+/// add phrase alignment information from a Hypothesis
+void
+TranslationRequest::
+add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
+{
+  if (!m_withAlignInfo) return;
+  //  if (!options()->output.ReportSegmentation) return;
+  Range const& trg = h.GetCurrTargetWordsRange();
+  Range const& src = h.GetCurrSourceWordsRange();
+
+  std::map<std::string, xmlrpc_c::value> pAlnInfo;
+  pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
+  pAlnInfo["tgt-end"] = xmlrpc_c::value_int(trg.GetEndPos());
+  pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
+  pAlnInfo["src-end"]   = xmlrpc_c::value_int(src.GetEndPos());
+  aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
+}
+
+void
+TranslationRequest::
+outputChartHypo(ostream& out, const ChartHypothesis* hypo)
+{
+  Phrase outPhrase(20);
+  hypo->GetOutputPhrase(outPhrase);
+
+  // delete 1st & last
+  assert(outPhrase.GetSize() >= 2);
+  outPhrase.RemoveWord(0);
+  outPhrase.RemoveWord(outPhrase.GetSize() - 1);
+  for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
+    out << *outPhrase.GetFactor(pos, 0) << " ";
+}
+
+bool
+TranslationRequest::
+compareSearchGraphNode(const Moses::SearchGraphNode& a,
+                       const Moses::SearchGraphNode& b)
+{
+  return a.hypo->GetId() < b.hypo->GetId();
+}
+
+void
+TranslationRequest::
+insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
+{
+  using xmlrpc_c::value_int;
+  using xmlrpc_c::value_double;
+  using xmlrpc_c::value_struct;
+  using xmlrpc_c::value_string;
+  vector<xmlrpc_c::value> searchGraphXml;
+  vector<SearchGraphNode> searchGraph;
+  manager.GetSearchGraph(searchGraph);
+  std::sort(searchGraph.begin(), searchGraph.end());
+  BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph) {
+    map<string, xmlrpc_c::value> x; // search graph xml node
+    x["forward"] = value_double(n.forward);
+    x["fscore"] = value_double(n.fscore);
+    const Hypothesis* hypo = n.hypo;
+    x["hyp"] = value_int(hypo->GetId());
+    x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
+    if (hypo->GetId() != 0) {
+      const Hypothesis *prevHypo = hypo->GetPrevHypo();
+      x["back"] = value_int(prevHypo->GetId());
+      x["score"] = value_double(hypo->GetScore());
+      x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
+      if (n.recombinationHypo)
+        x["recombined"] = value_int(n.recombinationHypo->GetId());
+      x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
+      x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
+      x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(options()->output.factor_order));
+    }
+    searchGraphXml.push_back(value_struct(x));
+  }
+  retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
+}
+
+void
+TranslationRequest::
+outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
+{
+  TrellisPathList nBestList;
+  vector<xmlrpc_c::value> nBestXml;
+
+  Moses::NBestOptions const& nbo = m_options->nbest; 
+  manager.CalcNBest(nbo.nbest_size, nBestList, nbo.only_distinct);
+  manager.OutputNBest(cout, nBestList); 
+
+  BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
+    vector<const Hypothesis *> const& E = path->GetEdges();
+    if (!E.size()) continue;
+    std::map<std::string, xmlrpc_c::value> nBestXmlItem;
+    pack_hypothesis(manager, E, "hyp", nBestXmlItem);
+    if (m_withScoreBreakdown) {
+      // should the score breakdown be reported in a more structured manner?
+      ostringstream buf;
+      bool with_labels = nbo.include_feature_labels;
+      path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels);
+      nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
+      nBestXmlItem["scores"] = PackScores(*path->GetScoreBreakdown());
+    }
+
+    // weighted score
+    nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetFutureScore());
+    nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
+  }
+  retData["nbest"] = xmlrpc_c::value_array(nBestXml);
+}
+
+void
+TranslationRequest::
+insertTranslationOptions(Moses::Manager& manager,
+                         std::map<std::string, xmlrpc_c::value>& retData)
+{
+  std::vector<Moses::FactorType> const& ofactor_order = options()->output.factor_order;
+  
+  const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions();
+  vector<xmlrpc_c::value> toptsXml;
+  size_t const stop = toptsColl->GetSource().GetSize();
+  TranslationOptionList const* tol;
+  for (size_t s = 0 ; s < stop ; ++s) {
+    for (size_t e=s;(tol=toptsColl->GetTranslationOptionList(s,e))!=NULL;++e) {
+      BOOST_FOREACH(TranslationOption const* topt, *tol) {
+        std::map<std::string, xmlrpc_c::value> toptXml;
+        TargetPhrase const& tp = topt->GetTargetPhrase();
+        std::string tphrase = tp.GetStringRep(ofactor_order);
+        toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
+        toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
+        toptXml["start"]  = xmlrpc_c::value_int(s);
+        toptXml["end"]    = xmlrpc_c::value_int(e);
+        vector<xmlrpc_c::value> scoresXml;
+        const std::valarray<FValue> &scores
+	  = topt->GetScoreBreakdown().getCoreFeatures();
+        for (size_t j = 0; j < scores.size(); ++j)
+          scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
+        toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
+	ostringstream buf;
+	topt->GetScoreBreakdown().OutputAllFeatureScores(buf, true);
+	toptXml["labelledScores"] = PackScores(topt->GetScoreBreakdown());
+        toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
+      }
+    }
+  }
+  retData["topt"] = xmlrpc_c::value_array(toptsXml);
+}
+
+TranslationRequest::
+TranslationRequest(xmlrpc_c::paramList const& paramList,
+                   boost::condition_variable& cond, boost::mutex& mut)
+  : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
+  , m_session_id(0)
+{ 
+
+}
+
+bool
+check(std::map<std::string, xmlrpc_c::value> const& param, 
+      std::string const key)
+{
+  std::map<std::string, xmlrpc_c::value>::const_iterator m = param.find(key);
+  if(m == param.end()) return false;
+
+  if (m->second.type() == xmlrpc_c::value::TYPE_BOOLEAN)
+    return xmlrpc_c::value_boolean(m->second);
+
+  std::string val = string(xmlrpc_c::value_string(m->second));
+  if(val == "true" || val == "True" || val == "TRUE" || val == "1") return true;
+  return false;
+}
+
+void
+TranslationRequest::
+parse_request(std::map<std::string, xmlrpc_c::value> const& params)
+{
+  // parse XMLRPC request
+  m_paramList.verifyEnd(1); // ??? UG
+
+  typedef std::map<std::string, xmlrpc_c::value> params_t;
+  params_t::const_iterator si;
+
+  si = params.find("session-id");
+  if (si != params.end()) 
+    {
+      m_session_id = xmlrpc_c::value_int(si->second);
+      Session const& S = m_translator->get_session(m_session_id);
+      m_scope = S.scope;
+      m_session_id = S.id;
+    } 
+  else
+    {
+      m_session_id = 0;
+      m_scope.reset(new Moses::ContextScope);
+    }
+
+  boost::shared_ptr<Moses::AllOptions> opts(new Moses::AllOptions(*StaticData::Instance().options()));
+  opts->update(params);
+
+  m_withGraphInfo = check(params, "sg");
+  if (m_withGraphInfo || opts->nbest.nbest_size > 0) {
+    opts->output.SearchGraph = "true";
+    opts->nbest.enabled = true;
+  }
+
+  m_options = opts;
+
+  // source text must be given, or we don't know what to translate
+  si = params.find("text");
+  if (si == params.end())
+    throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
+  m_source_string = xmlrpc_c::value_string(si->second);
+  XVERBOSE(1,"Input: " << m_source_string << endl);
+  
+  m_withTopts           = check(params, "topt");
+  m_withScoreBreakdown  = check(params, "add-score-breakdown");
+  si = params.find("lambda");
+  if (si != params.end()) 
+    {
+      // muMo = multiModel
+      xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
+      vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
+      vector<float> w(muMoValVec.size());
+      for (size_t i = 0; i < muMoValVec.size(); ++i)
+	w[i] = xmlrpc_c::value_double(muMoValVec[i]);
+      if (w.size() && (si = params.find("model_name")) != params.end()) 
+	{
+	  string const model_name = xmlrpc_c::value_string(si->second);
+	  PhraseDictionaryMultiModel* pdmm
+	    = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
+	  pdmm->SetTemporaryMultiModelWeightsVector(w);
+	}
+    }
+  
+  si = params.find("context");
+  if (si != params.end()) 
+    {
+      string context = xmlrpc_c::value_string(si->second);
+      VERBOSE(1,"CONTEXT " << context);
+      m_context.reset(new std::vector<std::string>(1,context));
+    }
+
+  si = params.find("context-scope");
+  if (si != params.end())
+    {
+
+      string context = xmlrpc_c::value_string(si->second);
+
+      string groupSeparator("Moses::ContextScope::GroupSeparator");
+      string recordSeparator("Moses::ContextScope::RecordSeparator");
+
+      // Here, we assume that any XML-RPC value
+      //       associated with the key "context-scope"
+      //       has the following format:
+      //
+      // FeatureFunctionName followed by recordSeparator
+      //                     followed by the value of interest
+      //                     followed by groupSeparator
+      //
+      // In the following code, the value of interest will be stored
+      //        in contextScope under the key FeatureFunctionName,
+      //        where FeatureFunctionName is the actual name of the feature function
+
+      boost::shared_ptr<Moses::ContextScope> contextScope = GetScope();
+
+      BOOST_FOREACH(string group, TokenizeMultiCharSeparator(context, groupSeparator)) {
+
+	vector<string> record = TokenizeMultiCharSeparator(group, recordSeparator);
+
+	// Use the feature function whose name is record[0] as a key
+	FeatureFunction& ff = Moses::FeatureFunction::FindFeatureFunction(record[0]);
+	void const* key = static_cast<void const*>(&ff);
+
+	// Store (in the context scope) record[1] as the value associated with that key
+	boost::shared_ptr<string> value = contextScope->get<string>(key,true);
+	value->replace(value->begin(), value->end(), record[1]);
+
+      }
+    }
+
+  // Report alignment info if Moses config says to or if XML request says to
+  m_withAlignInfo = options()->output.ReportSegmentation || check(params, "align");
+
+  // Report word alignment info if Moses config says to or if XML request says to
+  m_withWordAlignInfo = options()->output.PrintAlignmentInfo || check(params, "word-align");
+
+  si = params.find("weights");
+  if (si != params.end())
+    {
+
+      boost::unordered_map<string, FeatureFunction*> map;
+      {
+	const vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
+	BOOST_FOREACH(FeatureFunction* const& ff, ffs) {
+	  map[ff->GetScoreProducerDescription()] = ff;
+	}
+      }
+
+      string allValues = xmlrpc_c::value_string(si->second);
+
+      BOOST_FOREACH(string values, TokenizeMultiCharSeparator(allValues, "\t")) {
+
+	vector<string> record = TokenizeMultiCharSeparator(values, "=");
+
+	if (record.size() == 2) {
+	  string featureName = record[0];
+	  string featureWeights = record[1];
+
+	  boost::unordered_map<string, FeatureFunction*>::iterator ffi = map.find(featureName);
+
+	  if (ffi != map.end()) {
+	    FeatureFunction* ff = ffi->second;
+
+	    size_t prevNumWeights = ff->GetNumScoreComponents();
+
+	    vector<float> ffWeights;
+	    BOOST_FOREACH(string weight, TokenizeMultiCharSeparator(featureWeights, " ")) {
+	      ffWeights.push_back(Scan<float>(weight));
+	    }
+
+	    if (ffWeights.size() == ff->GetNumScoreComponents()) {
+
+	      // XXX: This is NOT thread-safe
+	      Moses::StaticData::InstanceNonConst().SetWeights(ff, ffWeights);
+	      VERBOSE(1, "WARNING: THIS IS NOT THREAD-SAFE!\tUpdating weights for " << featureName << " to " << featureWeights << "\n");
+
+	    } else {
+	      TRACE_ERR("ERROR: Unable to update weights for " << featureName << " because " << ff->GetNumScoreComponents() << " weights are required but only " << ffWeights.size() << " were provided\n");
+	    }
+
+	  } else {
+	    TRACE_ERR("ERROR: No FeatureFunction with name " << featureName << ", no weight update\n");
+	  }
+
+	} else {
+	  TRACE_ERR("WARNING: XML-RPC weights update was improperly formatted:\t" << values << "\n");
+	}
+
+      }
+
+    }
+
+
+  // // biased sampling for suffix-array-based sampling phrase table?
+  // if ((si = params.find("bias")) != params.end())
+  //   {
+  // 	std::vector<xmlrpc_c::value> tmp
+  // 	  = xmlrpc_c::value_array(si->second).cvalue();
+  // 	for (size_t i = 1; i < tmp.size(); i += 2)
+  // 	  m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
+  //   }
+  if (is_syntax(m_options->search.algo)) {
+    m_source.reset(new Sentence(m_options,0,m_source_string));
+  } else {
+    m_source.reset(new Sentence(m_options,0,m_source_string));
+  }
+	interpret_dlt();
+} // end of Translationtask::parse_request()
+
+
+void
+TranslationRequest::
+run_chart_decoder()
+{
+  Moses::ChartManager manager(this->self());
+  manager.Decode();
+
+  const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
+  ostringstream out;
+  if (hypo) outputChartHypo(out,hypo);
+
+  m_target_string = out.str();
+  m_retData["text"] = xmlrpc_c::value_string(m_target_string);
+
+  if (m_withGraphInfo) {
+    std::ostringstream sgstream;
+    manager.OutputSearchGraphMoses(sgstream);
+    m_retData["sg"] =  xmlrpc_c::value_string(sgstream.str());
+  }
+} // end of TranslationRequest::run_chart_decoder()
+
+void
+TranslationRequest::
+pack_hypothesis(const Moses::Manager& manager, 
+		vector<Hypothesis const* > const& edges, string const& key,
+                map<string, xmlrpc_c::value> & dest) const
+{
+  // target string
+  ostringstream target;
+  BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
+    manager.OutputSurface(target, *e); 
+  }
+  XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) 
+	   << std::endl);
+  dest[key] = xmlrpc_c::value_string(target.str());
+
+  if (m_withAlignInfo) {
+  //  if (options()->output.ReportSegmentation) {
+    // phrase alignment, if requested
+
+    vector<xmlrpc_c::value> p_aln;
+    BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
+      add_phrase_aln_info(*e, p_aln);
+    dest["align"] = xmlrpc_c::value_array(p_aln);
+  }
+
+  if (m_withWordAlignInfo) {
+    //if (options()->output.PrintAlignmentInfo) { 
+    // word alignment, if requested
+    vector<xmlrpc_c::value> w_aln;
+    BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
+      e->OutputLocalWordAlignment(w_aln);
+    dest["word-align"] = xmlrpc_c::value_array(w_aln);
+  }
+}
+
+void
+TranslationRequest::
+pack_hypothesis(const Moses::Manager& manager, Hypothesis const* h, string const& key,
+                map<string, xmlrpc_c::value>& dest) const
+{
+  using namespace std;
+  vector<Hypothesis const*> edges;
+  for (; h; h = h->GetPrevHypo())
+    edges.push_back(h);
+  pack_hypothesis(manager, edges, key, dest);
+}
+
+
+void
+TranslationRequest::
+run_phrase_decoder()
+{
+  Manager manager(this->self());
+  manager.Decode();
+  pack_hypothesis(manager, manager.GetBestHypothesis(), "text", m_retData);
+  if (m_session_id)
+    m_retData["session-id"] = xmlrpc_c::value_int(m_session_id);
+  
+  if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
+  if (m_withTopts) insertTranslationOptions(manager,m_retData);
+  if (m_options->nbest.nbest_size) outputNBest(manager, m_retData);
+
+}
+}
diff --git a/mosesdecoder/moses/server/Updater.cpp b/mosesdecoder/moses/server/Updater.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..095af383866f1710ae85e18e9e99f8a78cd36dfd
--- /dev/null
+++ b/mosesdecoder/moses/server/Updater.cpp
@@ -0,0 +1,58 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#include "Updater.h"
+
+namespace MosesServer
+{
+using namespace Moses;
+using namespace std;
+
+Updater::
+Updater()
+{
+  // signature and help strings are documentation -- the client
+  // can query this information with a system.methodSignature and
+  // system.methodHelp RPC.
+  this->_signature = "S:S";
+  this->_help = "Updates stuff";
+}
+
+void
+Updater::
+execute(xmlrpc_c::paramList const& paramList,
+        xmlrpc_c::value *   const  retvalP)
+{
+#if PT_UG
+  const params_t params = paramList.getStruct(0);
+  breakOutParams(params);
+  Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
+  pdsa->add(m_src, m_trg, m_aln);
+  XVERBOSE(1,"Done inserting\n");
+  *retvalP = xmlrpc_c::value_string("Phrase table updated");
+#endif
+};
+
+void
+Updater::
+breakOutParams(const params_t& params)
+{
+  params_t::const_iterator si = params.find("source");
+  if(si == params.end())
+    throw xmlrpc_c::fault("Missing source sentence",
+                          xmlrpc_c::fault::CODE_PARSE);
+  m_src = xmlrpc_c::value_string(si->second);
+  XVERBOSE(1,"source = " << m_src << endl);
+  si = params.find("target");
+  if(si == params.end())
+    throw xmlrpc_c::fault("Missing target sentence",
+                          xmlrpc_c::fault::CODE_PARSE);
+  m_trg = xmlrpc_c::value_string(si->second);
+  XVERBOSE(1,"target = " << m_trg << endl);
+  if((si = params.find("alignment")) == params.end())
+    throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
+  m_aln = xmlrpc_c::value_string(si->second);
+  XVERBOSE(1,"alignment = " << m_aln << endl);
+  m_bounded  = ((si = params.find("bounded")) != params.end());
+  m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
+};
+
+}
diff --git a/mosesdecoder/moses/server/Updater.h b/mosesdecoder/moses/server/Updater.h
new file mode 100644
index 0000000000000000000000000000000000000000..e3eba52ef1edfc746befb78bcad5175f95d4ff44
--- /dev/null
+++ b/mosesdecoder/moses/server/Updater.h
@@ -0,0 +1,44 @@
+// -*- c++ -*-
+#pragma once
+
+#include "moses/Util.h"
+#include "moses/ChartManager.h"
+#include "moses/Hypothesis.h"
+#include "moses/Manager.h"
+#include "moses/StaticData.h"
+#include "moses/ThreadPool.h"
+
+#if PT_UG
+#include "moses/TranslationModel/UG/mmsapt.h"
+#endif
+
+#include <xmlrpc-c/base.hpp>
+#include <xmlrpc-c/registry.hpp>
+#include <xmlrpc-c/server_abyss.hpp>
+
+
+namespace MosesServer
+{
+class
+  Updater: public xmlrpc_c::method
+{
+
+  typedef std::map<std::string, xmlrpc_c::value> params_t;
+
+
+  std::string m_src, m_trg, m_aln;
+  bool m_bounded, m_add2ORLM;
+
+public:
+  Updater();
+
+  void
+  execute(xmlrpc_c::paramList const& paramList,
+          xmlrpc_c::value * const  retvalP);
+
+  void
+  breakOutParams(const params_t& params);
+
+};
+
+}
diff --git a/mosesdecoder/util/bit_packing_test.cc b/mosesdecoder/util/bit_packing_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c4494b69fa3bf42506ded3bdc206f77cbc45a486
--- /dev/null
+++ b/mosesdecoder/util/bit_packing_test.cc
@@ -0,0 +1,59 @@
+#include "util/bit_packing.hh"
+
+#define BOOST_TEST_MODULE BitPackingTest
+#include <boost/test/unit_test.hpp>
+
+#include <cstring>
+
+namespace util {
+namespace {
+
+const uint64_t test57 = 0x123456789abcdefULL;
+const uint32_t test25 = 0x1234567;
+
+BOOST_AUTO_TEST_CASE(ZeroBit57) {
+  char mem[16];
+  memset(mem, 0, sizeof(mem));
+  WriteInt57(mem, 0, 57, test57);
+  BOOST_CHECK_EQUAL(test57, ReadInt57(mem, 0, 57, (1ULL << 57) - 1));
+}
+
+BOOST_AUTO_TEST_CASE(EachBit57) {
+  char mem[16];
+  for (uint8_t b = 0; b < 8; ++b) {
+    memset(mem, 0, sizeof(mem));
+    WriteInt57(mem, b, 57, test57);
+    BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
+  }
+}
+
+BOOST_AUTO_TEST_CASE(Consecutive57) {
+  char mem[57+8];
+  memset(mem, 0, sizeof(mem));
+  for (uint64_t b = 0; b < 57 * 8; b += 57) {
+    WriteInt57(mem, b, 57, test57);
+    BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
+  }
+  for (uint64_t b = 0; b < 57 * 8; b += 57) {
+    BOOST_CHECK_EQUAL(test57, ReadInt57(mem, b, 57, (1ULL << 57) - 1));
+  }
+}
+
+BOOST_AUTO_TEST_CASE(Consecutive25) {
+  char mem[25+8];
+  memset(mem, 0, sizeof(mem));
+  for (uint64_t b = 0; b < 25 * 8; b += 25) {
+    WriteInt25(mem, b, 25, test25);
+    BOOST_CHECK_EQUAL(test25, ReadInt25(mem, b, 25, (1ULL << 25) - 1));
+  }
+  for (uint64_t b = 0; b < 25 * 8; b += 25) {
+    BOOST_CHECK_EQUAL(test25, ReadInt25(mem, b, 25, (1ULL << 25) - 1));
+  }
+}
+
+BOOST_AUTO_TEST_CASE(Sanity) {
+  BitPackingSanity();
+}
+
+} // namespace
+} // namespace util
diff --git a/mosesdecoder/util/ersatz_progress.hh b/mosesdecoder/util/ersatz_progress.hh
new file mode 100644
index 0000000000000000000000000000000000000000..b47aded7d9c6ff8d3e3a248e3c6a0b0c2e075246
--- /dev/null
+++ b/mosesdecoder/util/ersatz_progress.hh
@@ -0,0 +1,57 @@
+#ifndef UTIL_ERSATZ_PROGRESS_H
+#define UTIL_ERSATZ_PROGRESS_H
+
+#include <iostream>
+#include <string>
+#include <stdint.h>
+
+// Ersatz version of boost::progress so core language model doesn't depend on
+// boost.  Also adds option to print nothing.
+
+namespace util {
+
+extern const char kProgressBanner[];
+
+class ErsatzProgress {
+  public:
+    // No output.
+    ErsatzProgress();
+
+    // Null means no output.  The null value is useful for passing along the ostream pointer from another caller.
+    explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
+
+    ~ErsatzProgress();
+
+    ErsatzProgress &operator++() {
+      if (++current_ >= next_) Milestone();
+      return *this;
+    }
+
+    ErsatzProgress &operator+=(uint64_t amount) {
+      if ((current_ += amount) >= next_) Milestone();
+      return *this;
+    }
+
+    void Set(uint64_t to) {
+      if ((current_ = to) >= next_) Milestone();
+    }
+
+    void Finished() {
+      Set(complete_);
+    }
+
+  private:
+    void Milestone();
+
+    uint64_t current_, next_, complete_;
+    unsigned char stones_written_;
+    std::ostream *out_;
+
+    // noncopyable
+    ErsatzProgress(const ErsatzProgress &other);
+    ErsatzProgress &operator=(const ErsatzProgress &other);
+};
+
+} // namespace util
+
+#endif // UTIL_ERSATZ_PROGRESS_H
diff --git a/mosesdecoder/util/exception.hh b/mosesdecoder/util/exception.hh
new file mode 100644
index 0000000000000000000000000000000000000000..b30183e7f913d307cce23be78fb22ac564fe4b0b
--- /dev/null
+++ b/mosesdecoder/util/exception.hh
@@ -0,0 +1,165 @@
+#ifndef UTIL_EXCEPTION_H
+#define UTIL_EXCEPTION_H
+
+#include "util/string_stream.hh"
+
+#include <exception>
+#include <limits>
+#include <string>
+#include <stdint.h>
+
+// TODO(hieu): delete this
+#include <sstream>
+
+namespace util {
+
+template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
+
+class Exception : public std::exception {
+  public:
+    Exception() throw();
+    virtual ~Exception() throw();
+
+    const char *what() const throw() { return what_.str().c_str(); }
+
+    // For use by the UTIL_THROW macros.
+    void SetLocation(
+        const char *file,
+        unsigned int line,
+        const char *func,
+        const char *child_name,
+        const char *condition);
+
+  private:
+    template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
+
+    // This helps restrict operator<< defined below.
+    template <class T> struct ExceptionTag {
+      typedef T Identity;
+    };
+
+    StringStream what_;
+};
+
+/* This implements the normal operator<< for Exception and all its children.
+ * SFINAE means it only applies to Exception.  Think of this as an ersatz
+ * boost::enable_if.
+ */
+template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
+  // TODO(hieu): delete this.
+  std::stringstream moses_hack;
+  moses_hack << data;
+  e.what_ << moses_hack.str();
+  return e;
+}
+
+#ifdef __GNUC__
+#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
+#else
+#ifdef _WIN32
+#define UTIL_FUNC_NAME __FUNCTION__
+#else
+#define UTIL_FUNC_NAME NULL
+#endif
+#endif
+
+/* Create an instance of Exception, add the message Modify, and throw it.
+ * Modify is appended to the what() message and can contain << for ostream
+ * operations.
+ *
+ * do .. while kludge to swallow trailing ; character
+ * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
+ * Arg can be a constructor argument to the exception.
+ */
+#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
+  Exception UTIL_e Arg; \
+  UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
+  UTIL_e << Modify; \
+  throw UTIL_e; \
+} while (0)
+
+#define UTIL_THROW_ARG(Exception, Arg, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
+
+#define UTIL_THROW(Exception, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, , Modify);
+
+#define UTIL_THROW2(Modify) \
+  UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
+
+#if __GNUC__ >= 3
+#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
+#else
+#define UTIL_UNLIKELY(x) (x)
+#endif
+
+#if __GNUC__ >= 3
+#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1)
+#else
+#define UTIL_LIKELY(x) (x)
+#endif
+
+#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
+  if (UTIL_UNLIKELY(Condition)) { \
+    UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
+  } \
+} while (0)
+
+#define UTIL_THROW_IF(Condition, Exception, Modify) \
+  UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
+
+#define UTIL_THROW_IF2(Condition, Modify) \
+  UTIL_THROW_IF_ARG(Condition, util::Exception, , Modify)
+
+// Exception that records errno and adds it to the message.
+class ErrnoException : public Exception {
+  public:
+    ErrnoException() throw();
+
+    virtual ~ErrnoException() throw();
+
+    int Error() const throw() { return errno_; }
+
+  private:
+    int errno_;
+};
+
+// file wasn't there, or couldn't be open for some reason
+class FileOpenException : public Exception {
+  public:
+	FileOpenException() throw() {}
+    ~FileOpenException() throw() {}
+};
+
+// Utilities for overflow checking.
+class OverflowException : public Exception {
+  public:
+    OverflowException() throw();
+    ~OverflowException() throw();
+};
+
+template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
+  UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected.  This model is too big for 32-bit code.");
+  return value;
+}
+
+template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
+  return value;
+}
+
+inline std::size_t CheckOverflow(uint64_t value) {
+  return CheckOverflowInternal<sizeof(std::size_t)>(value);
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+/* Thrown for Windows specific operations. */
+class WindowsException : public Exception {
+  public:
+    WindowsException() throw();
+    ~WindowsException() throw();
+};
+#endif
+
+} // namespace util
+
+#endif // UTIL_EXCEPTION_H
diff --git a/mosesdecoder/util/fake_ostream.hh b/mosesdecoder/util/fake_ostream.hh
new file mode 100644
index 0000000000000000000000000000000000000000..2f76053cc583525bf67f77c51afca0da588db4f3
--- /dev/null
+++ b/mosesdecoder/util/fake_ostream.hh
@@ -0,0 +1,111 @@
+#ifndef UTIL_FAKE_OSTREAM_H
+#define UTIL_FAKE_OSTREAM_H
+
+#include "util/float_to_string.hh"
+#include "util/integer_to_string.hh"
+#include "util/string_piece.hh"
+
+#include <cassert>
+#include <limits>
+
+#include <stdint.h>
+
+namespace util {
+
+/* Like std::ostream but without being incredibly slow.
+ * Supports most of the built-in types except for long double.
+ * 
+ * The FakeOStream class is intended to be inherited from.  The inherting class
+ * should provide:
+ * public:
+ *   Derived &flush();
+ *   Derived &write(const void *data, std::size_t length);
+ * 
+ * private: or protected:
+ *   friend class FakeOStream;
+ *   char *Ensure(std::size_t amount);
+ *   void AdvanceTo(char *to);
+ *
+ * The Ensure function makes enough space for an in-place write and returns
+ * where to write.  The AdvanceTo function happens after the write, saying how
+ * much was actually written.
+ * 
+ * Precondition:
+ * amount <= kToStringMaxBytes for in-place writes.
+ */
+template <class Derived> class FakeOStream {
+  public:
+    FakeOStream() {}
+
+    // This also covers std::string and char*
+    Derived &operator<<(StringPiece str) {
+      return C().write(str.data(), str.size());
+    }
+
+    // Handle integers by size and signedness.
+  private:
+    template <class Arg> struct EnableIfKludge {
+      typedef Derived type;
+    };
+    template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed, bool IsInteger = std::numeric_limits<From>::is_integer> struct Coerce {};
+
+    template <class From> struct Coerce<From, 2, false, true> { typedef uint16_t To; };
+    template <class From> struct Coerce<From, 4, false, true> { typedef uint32_t To; };
+    template <class From> struct Coerce<From, 8, false, true> { typedef uint64_t To; };
+
+    template <class From> struct Coerce<From, 2, true, true> { typedef int16_t To; };
+    template <class From> struct Coerce<From, 4, true, true> { typedef int32_t To; };
+    template <class From> struct Coerce<From, 8, true, true> { typedef int64_t To; };
+  public:
+    template <class From> typename EnableIfKludge<typename Coerce<From>::To>::type &operator<<(const From value) {
+      return CallToString(static_cast<typename Coerce<From>::To>(value));
+    }
+
+    // Character types that get copied as bytes instead of displayed as integers.
+    Derived &operator<<(char val) { return put(val); }
+    Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
+    Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
+
+    Derived &operator<<(bool val) { return put(val + '0'); }
+    // enums will fall back to int but are not caught by the template.
+    Derived &operator<<(int val) { return CallToString(static_cast<typename Coerce<int>::To>(val)); }
+
+    Derived &operator<<(float val) { return CallToString(val); }
+    Derived &operator<<(double val) { return CallToString(val); }
+
+    // This is here to catch all the other pointer types.
+    Derived &operator<<(const void *value) { return CallToString(value); }
+    // This is here because the above line also catches const char*.
+    Derived &operator<<(const char *value) { return *this << StringPiece(value); }
+    Derived &operator<<(char *value) { return *this << StringPiece(value); }
+
+    Derived &put(char val) {
+      char *c = C().Ensure(1);
+      *c = val;
+      C().AdvanceTo(++c);
+      return C();
+    }
+
+    char widen(char val) const { return val; }
+
+  private:
+    // References to derived class for convenience.
+    Derived &C() {
+      return *static_cast<Derived*>(this);
+    }
+
+    const Derived &C() const {
+      return *static_cast<const Derived*>(this);
+    }
+
+    // This is separate to prevent an infinite loop if the compiler considers
+    // types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
+    template <class T> Derived &CallToString(const T value) {
+      C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf<T>::kBytes)));
+      return C();
+    }
+};
+
+} // namespace
+
+#endif // UTIL_FAKE_OSTREAM_H
diff --git a/mosesdecoder/util/file_piece.hh b/mosesdecoder/util/file_piece.hh
new file mode 100644
index 0000000000000000000000000000000000000000..d3d83054d6e6ccc06970b82708d45e69433ca80c
--- /dev/null
+++ b/mosesdecoder/util/file_piece.hh
@@ -0,0 +1,175 @@
+#ifndef UTIL_FILE_PIECE_H
+#define UTIL_FILE_PIECE_H
+
+#include "util/ersatz_progress.hh"
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/mmap.hh"
+#include "util/read_compressed.hh"
+#include "util/string_piece.hh"
+
+#include <cstddef>
+#include <iosfwd>
+#include <string>
+#include <cassert>
+#include <stdint.h>
+
+namespace util {
+
+class ParseNumberException : public Exception {
+  public:
+    explicit ParseNumberException(StringPiece value) throw();
+    ~ParseNumberException() throw() {}
+};
+
+extern const bool kSpaces[256];
+
+// Memory backing the returned StringPiece may vanish on the next call.
+class FilePiece {
+  public:
+    // 1 MB default.
+    explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+    // Takes ownership of fd.  name is used for messages.
+    explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+
+    /* Read from an istream.  Don't use this if you can avoid it.  Raw fd IO is
+     * much faster.  But sometimes you just have an istream like Boost's HTTP
+     * server and want to parse it the same way.
+     * name is just used for messages and FileName().
+     */
+    explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
+
+    ~FilePiece();
+
+    char get() {
+      if (position_ == position_end_) {
+        Shift();
+        if (at_end_) throw EndOfFileException();
+      }
+      return *(position_++);
+    }
+
+    // Leaves the delimiter, if any, to be returned by get().  Delimiters defined by isspace().
+    StringPiece ReadDelimited(const bool *delim = kSpaces) {
+      SkipSpaces(delim);
+      return Consume(FindDelimiterOrEOF(delim));
+    }
+
+    /// Read word until the line or file ends.
+    bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
+      assert(delim[static_cast<unsigned char>('\n')]);
+      // Skip non-enter spaces.
+      for (; ; ++position_) {
+        if (position_ == position_end_) {
+          try {
+            Shift();
+          } catch (const util::EndOfFileException &e) { return false; }
+          // And break out at end of file.
+          if (position_ == position_end_) return false;
+        }
+        if (!delim[static_cast<unsigned char>(*position_)]) break;
+        if (*position_ == '\n') return false;
+      }
+      // We can't be at the end of file because there's at least one character open.
+      to = Consume(FindDelimiterOrEOF(delim));
+      return true;
+    }
+
+    /** Read a line of text from the file.
+     *
+     * Unlike ReadDelimited, this includes leading spaces and consumes the
+     * delimiter.   It is similar to getline in that way.
+     *
+     * If strip_cr is true, any trailing carriate return (as would be found on
+     * a file written on Windows) will be left out of the returned line.
+     *
+     * Throws EndOfFileException if the end of the file is encountered.  If the
+     * file does not end in a newline, this could mean that the last line is
+     * never read.
+     */
+    StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
+
+    /** Read a line of text from the file, or return false on EOF.
+     *
+     * This is like ReadLine, except it returns false where ReadLine throws
+     * EndOfFileException.  Like ReadLine it may not read the last line in the
+     * file if the file does not end in a newline.
+     *
+     * If strip_cr is true, any trailing carriate return (as would be found on
+     * a file written on Windows) will be left out of the returned line.
+     */
+    bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true);
+
+    float ReadFloat();
+    double ReadDouble();
+    long int ReadLong();
+    unsigned long int ReadULong();
+
+    // Skip spaces defined by isspace.
+    void SkipSpaces(const bool *delim = kSpaces) {
+      assert(position_ <= position_end_);
+      for (; ; ++position_) {
+        if (position_ == position_end_) {
+          Shift();
+          // And break out at end of file.
+          if (position_ == position_end_) return;
+        }
+        assert(position_ < position_end_);
+        if (!delim[static_cast<unsigned char>(*position_)]) return;
+      }
+    }
+
+    uint64_t Offset() const {
+      return position_ - data_.begin() + mapped_offset_;
+    }
+
+    const std::string &FileName() const { return file_name_; }
+
+  private:
+    void InitializeNoRead(const char *name, std::size_t min_buffer);
+    // Calls InitializeNoRead, so don't call both.
+    void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
+
+    template <class T> T ReadNumber();
+
+    StringPiece Consume(const char *to) {
+      assert(to >= position_);
+      StringPiece ret(position_, to - position_);
+      position_ = to;
+      return ret;
+    }
+
+    const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
+
+    void Shift();
+    // Backends to Shift().
+    void MMapShift(uint64_t desired_begin);
+
+    void TransitionToRead();
+    void ReadShift();
+
+    const char *position_, *last_space_, *position_end_;
+
+    scoped_fd file_;
+    const uint64_t total_size_;
+    const uint64_t page_;
+
+    std::size_t default_map_size_;
+    uint64_t mapped_offset_;
+
+    // Order matters: file_ should always be destroyed after this.
+    scoped_memory data_;
+
+    bool at_end_;
+    bool fallback_to_read_;
+
+    ErsatzProgress progress_;
+
+    std::string file_name_;
+
+    ReadCompressed fell_back_;
+};
+
+} // namespace util
+
+#endif // UTIL_FILE_PIECE_H
diff --git a/mosesdecoder/util/file_piece_test.cc b/mosesdecoder/util/file_piece_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d03cd312d0d7622fdee3af09e5063fb5d7591948
--- /dev/null
+++ b/mosesdecoder/util/file_piece_test.cc
@@ -0,0 +1,154 @@
+// Tests might fail if you have creative characters in your path.  Sue me.
+#include "util/file_piece.hh"
+
+#include "util/file_stream.hh"
+#include "util/file.hh"
+#include "util/scoped.hh"
+
+#define BOOST_TEST_MODULE FilePieceTest
+#include <boost/test/unit_test.hpp>
+#include <fstream>
+#include <iostream>
+#include <cstdio>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+namespace util {
+namespace {
+
+std::string FileLocation() {
+  if (boost::unit_test::framework::master_test_suite().argc < 2) {
+    return "file_piece.cc";
+  }
+  std::string ret(boost::unit_test::framework::master_test_suite().argv[1]);
+  return ret;
+}
+
+/* istream */
+BOOST_AUTO_TEST_CASE(IStream) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  std::fstream backing(FileLocation().c_str(), std::ios::in);
+  FilePiece test(backing);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    BOOST_CHECK_EQUAL(ref_line, test_line);
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
+/* mmap implementation */
+BOOST_AUTO_TEST_CASE(MMapReadLine) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  FilePiece test(FileLocation().c_str(), NULL, 1);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
+/* Apple isn't happy with the popen, fileno, dup.  And I don't want to
+ * reimplement popen.  This is an issue with the test.
+ */
+/* read() implementation */
+BOOST_AUTO_TEST_CASE(StreamReadLine) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+
+  std::string popen_args = "cat \"";
+  popen_args += FileLocation();
+  popen_args += '"';
+
+  FILE *catter = popen(popen_args.c_str(), "r");
+  BOOST_REQUIRE(catter);
+
+  FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_REQUIRE(!pclose(catter));
+}
+#endif
+
+#ifdef HAVE_ZLIB
+
+// gzip file
+BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
+  std::string location(FileLocation());
+  std::fstream ref(location.c_str(), std::ios::in);
+
+  std::string command("gzip <\"");
+  command += location + "\" >\"" + location + "\".gz";
+
+  BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
+  FilePiece test((location + ".gz").c_str(), NULL, 1);
+  unlink((location + ".gz").c_str());
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
+// gzip stream.  Apple doesn't like popen, fileno, dup.  This is an issue with
+// the test.
+#if !defined __APPLE__ && !defined __MINGW32__
+BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+
+  std::string command("gzip <\"");
+  command += FileLocation() + "\"";
+
+  FILE * catter = popen(command.c_str(), "r");
+  BOOST_REQUIRE(catter);
+
+  FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_REQUIRE(!pclose(catter));
+}
+#endif // __APPLE__
+
+#endif // HAVE_ZLIB
+
+BOOST_AUTO_TEST_CASE(Numbers) {
+  scoped_fd file(MakeTemp(FileLocation()));
+  const float floating = 3.2;
+  {
+    util::FileStream writing(file.get());
+    writing << "94389483984398493890287 " << floating << " 5";
+  }
+  SeekOrThrow(file.get(), 0);
+  util::FilePiece f(file.release());
+  BOOST_CHECK_THROW(f.ReadULong(), ParseNumberException);
+  BOOST_CHECK_EQUAL("94389483984398493890287", f.ReadDelimited());
+  // Yes, exactly equal.  Isn't double-conversion wonderful?
+  BOOST_CHECK_EQUAL(floating, f.ReadFloat());
+  BOOST_CHECK_EQUAL(5, f.ReadULong());
+}
+
+} // namespace
+} // namespace util
diff --git a/mosesdecoder/util/generator.hh b/mosesdecoder/util/generator.hh
new file mode 100644
index 0000000000000000000000000000000000000000..afa0db611c41dc6b9f33472b1e6ed54661f195d5
--- /dev/null
+++ b/mosesdecoder/util/generator.hh
@@ -0,0 +1,34 @@
+#pragma once
+
+// generator/continuation for C++
+// author: Andrew Fedoniouk @ terrainformatica.com
+// idea borrowed from: "coroutines in C" Simon Tatham,
+//                     http://www.chiark.greenend.org.uk/~sgtatham/coroutines.html
+// BSD license
+
+template<typename T>
+  struct _generator
+  {
+    T* _stack;
+    int _line;
+    _generator():_stack(0), _line(-1) {}
+    void _push() { T* n = new T; *n = *static_cast<T*>(this); _stack = n; }
+    bool _pop() { if(!_stack) return false; T* t = _stack; *static_cast<T*>(this) = *_stack; t->_stack = 0; delete t; return true; }
+    ~_generator() { while(_pop()); }
+  };
+
+  #define $generator(NAME) struct NAME : public _generator<NAME>
+
+  #define $emit(T) bool operator()(T& _rv) { \
+                      if(_line < 0) _line=0; \
+                      $START: switch(_line) { case 0:;
+
+  #define $stop  } _line = 0; if(_pop()) goto $START; return false; }
+
+  #define $restart(WITH) { _push(); _stack->_line = __LINE__; _line=0; WITH; goto $START; case __LINE__:; }
+
+  #define $yield(V)     \
+          do {\
+              _line=__LINE__;\
+              _rv = (V); return true; case __LINE__:;\
+          } while (0)
diff --git a/mosesdecoder/util/getopt.c b/mosesdecoder/util/getopt.c
new file mode 100644
index 0000000000000000000000000000000000000000..50eef42cc25f0e0da6b6ffa00f04a92b1a4383c6
--- /dev/null
+++ b/mosesdecoder/util/getopt.c
@@ -0,0 +1,78 @@
+/*
+POSIX getopt for Windows
+
+AT&T Public License
+
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
+
+#ifndef __GNUC__
+
+#include "getopt.hh"
+#include <stdio.h>
+#include <string.h>
+
+#define NULL	0
+#define EOF	(-1)
+#define ERR(s, c)	if(opterr){\
+	char errbuf[2];\
+	errbuf[0] = c; errbuf[1] = '\n';\
+	fputs(argv[0], stderr);\
+	fputs(s, stderr);\
+	fputc(c, stderr);}
+	//(void) write(2, argv[0], (unsigned)strlen(argv[0]));\
+	//(void) write(2, s, (unsigned)strlen(s));\
+	//(void) write(2, errbuf, 2);}
+
+int	opterr = 1;
+int	optind = 1;
+int	optopt;
+char	*optarg;
+
+int
+getopt(argc, argv, opts)
+int	argc;
+char	**argv, *opts;
+{
+	static int sp = 1;
+	register int c;
+	register char *cp;
+
+	if(sp == 1)
+		if(optind >= argc ||
+		   argv[optind][0] != '-' || argv[optind][1] == '\0')
+			return(EOF);
+		else if(strcmp(argv[optind], "--") == NULL) {
+			optind++;
+			return(EOF);
+		}
+	optopt = c = argv[optind][sp];
+	if(c == ':' || (cp=strchr(opts, c)) == NULL) {
+		ERR(": illegal option -- ", c);
+		if(argv[optind][++sp] == '\0') {
+			optind++;
+			sp = 1;
+		}
+		return('?');
+	}
+	if(*++cp == ':') {
+		if(argv[optind][sp+1] != '\0')
+			optarg = &argv[optind++][sp+1];
+		else if(++optind >= argc) {
+			ERR(": option requires an argument -- ", c);
+			sp = 1;
+			return('?');
+		} else
+			optarg = argv[optind++];
+		sp = 1;
+	} else {
+		if(argv[optind][++sp] == '\0') {
+			sp = 1;
+			optind++;
+		}
+		optarg = NULL;
+	}
+	return(c);
+}
+
+#endif  /* __GNUC__ */
diff --git a/mosesdecoder/util/integer_to_string_test.cc b/mosesdecoder/util/integer_to_string_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..136c88f62111e13580c696e63ea51da5da799b06
--- /dev/null
+++ b/mosesdecoder/util/integer_to_string_test.cc
@@ -0,0 +1,81 @@
+#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
+#include "util/integer_to_string.hh"
+#include "util/string_piece.hh"
+
+#define BOOST_TEST_MODULE IntegerToStringTest
+#include <boost/test/unit_test.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <limits>
+
+namespace util {
+namespace {
+
+template <class T> void TestValue(const T value) {
+  char buf[ToStringBuf<T>::kBytes];
+  StringPiece result(buf, ToString(value, buf) - buf);
+  BOOST_REQUIRE_GE(static_cast<std::size_t>(ToStringBuf<T>::kBytes), result.size());
+  if (value) {
+    BOOST_CHECK_EQUAL(boost::lexical_cast<std::string>(value), result);
+  } else {
+    // Platforms can do void * as 0x0 or 0.
+    BOOST_CHECK(result == "0x0" || result == "0");
+  }
+}
+
+template <class T> void TestCorners() {
+  TestValue(std::numeric_limits<T>::min());
+  TestValue(std::numeric_limits<T>::max());
+  TestValue((T)0);
+  TestValue((T)-1);
+  TestValue((T)1);
+}
+
+BOOST_AUTO_TEST_CASE(Corners) {
+  TestCorners<uint16_t>();
+  TestCorners<uint32_t>();
+  TestCorners<uint64_t>();
+  TestCorners<int16_t>();
+  TestCorners<int32_t>();
+  TestCorners<int64_t>();
+  TestCorners<const void*>();
+}
+
+template <class T> void TestAll() {
+  for (T i = std::numeric_limits<T>::min(); i < std::numeric_limits<T>::max(); ++i) {
+    TestValue(i);
+  }
+  TestValue(std::numeric_limits<T>::max());
+}
+
+BOOST_AUTO_TEST_CASE(Short) {
+  TestAll<uint16_t>();
+  TestAll<int16_t>();
+}
+
+template <class T> void Test10s() {
+  for (T i = 1; i < std::numeric_limits<T>::max() / 10; i *= 10) {
+    TestValue(i);
+    TestValue(i - 1);
+    TestValue(i + 1);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(Tens) {
+  Test10s<uint64_t>();
+  Test10s<int64_t>();
+  Test10s<uint32_t>();
+  Test10s<int32_t>();
+}
+
+BOOST_AUTO_TEST_CASE(Pointers) {
+  for (uintptr_t i = 1; i < std::numeric_limits<uintptr_t>::max() / 10; i *= 10) {
+    TestValue((const void*)i);
+  }
+  for (uintptr_t i = 0; i < 256; ++i) {
+    TestValue((const void*)i);
+    TestValue((const void*)(i + 0xf00));
+  }
+}
+
+}} // namespaces
diff --git a/mosesdecoder/util/joint_sort.hh b/mosesdecoder/util/joint_sort.hh
new file mode 100644
index 0000000000000000000000000000000000000000..de4b554ff68c1ff305fe0ea6ea4375d1fdf6cbd6
--- /dev/null
+++ b/mosesdecoder/util/joint_sort.hh
@@ -0,0 +1,146 @@
+#ifndef UTIL_JOINT_SORT_H
+#define UTIL_JOINT_SORT_H
+
+/* A terrifying amount of C++ to coax std::sort into soring one range while
+ * also permuting another range the same way.
+ */
+
+#include "util/proxy_iterator.hh"
+
+#include <algorithm>
+#include <functional>
+
+namespace util {
+
+namespace detail {
+
+template <class KeyIter, class ValueIter> class JointProxy;
+
+template <class KeyIter, class ValueIter> class JointIter {
+  public:
+    JointIter() {}
+
+    JointIter(const KeyIter &key_iter, const ValueIter &value_iter) : key_(key_iter), value_(value_iter) {}
+
+    bool operator==(const JointIter<KeyIter, ValueIter> &other) const { return key_ == other.key_; }
+
+    bool operator<(const JointIter<KeyIter, ValueIter> &other) const { return (key_ < other.key_); }
+
+    std::ptrdiff_t operator-(const JointIter<KeyIter, ValueIter> &other) const { return key_ - other.key_; }
+
+    JointIter<KeyIter, ValueIter> &operator+=(std::ptrdiff_t amount) {
+      key_ += amount;
+      value_ += amount;
+      return *this;
+    }
+
+    friend void swap(JointIter &first, JointIter &second) {
+      using std::swap;
+      swap(first.key_, second.key_);
+      swap(first.value_, second.value_);
+    }
+
+    void DeepSwap(JointIter &other) {
+      using std::swap;
+      swap(*key_, *other.key_);
+      swap(*value_, *other.value_);
+    }
+
+  private:
+    friend class JointProxy<KeyIter, ValueIter>;
+    KeyIter key_;
+    ValueIter value_;
+};
+
+template <class KeyIter, class ValueIter> class JointProxy {
+  private:
+    typedef JointIter<KeyIter, ValueIter> InnerIterator;
+
+  public:
+    typedef struct {
+      typename std::iterator_traits<KeyIter>::value_type key;
+      typename std::iterator_traits<ValueIter>::value_type value;
+      const typename std::iterator_traits<KeyIter>::value_type &GetKey() const { return key; }
+    } value_type;
+
+    JointProxy(const KeyIter &key_iter, const ValueIter &value_iter) : inner_(key_iter, value_iter) {}
+    JointProxy(const JointProxy<KeyIter, ValueIter> &other) : inner_(other.inner_) {}
+
+    operator value_type() const {
+      value_type ret;
+      ret.key = *inner_.key_;
+      ret.value = *inner_.value_;
+      return ret;
+    }
+
+    JointProxy &operator=(const JointProxy &other) {
+      *inner_.key_ = *other.inner_.key_;
+      *inner_.value_ = *other.inner_.value_;
+      return *this;
+    }
+
+    JointProxy &operator=(const value_type &other) {
+      *inner_.key_ = other.key;
+      *inner_.value_ = other.value;
+      return *this;
+    }
+
+    typename std::iterator_traits<KeyIter>::reference GetKey() const {
+      return *(inner_.key_);
+    }
+
+    friend void swap(JointProxy<KeyIter, ValueIter> first, JointProxy<KeyIter, ValueIter> second) {
+      first.Inner().DeepSwap(second.Inner());
+    }
+
+  private:
+    friend class ProxyIterator<JointProxy<KeyIter, ValueIter> >;
+
+    InnerIterator &Inner() { return inner_; }
+    const InnerIterator &Inner() const { return inner_; }
+    InnerIterator inner_;
+};
+
+template <class Proxy, class Less> class LessWrapper : public std::binary_function<const typename Proxy::value_type &, const typename Proxy::value_type &, bool> {
+  public:
+    explicit LessWrapper(const Less &less) : less_(less) {}
+
+    bool operator()(const Proxy &left, const Proxy &right) const {
+      return less_(left.GetKey(), right.GetKey());
+    }
+    bool operator()(const Proxy &left, const typename Proxy::value_type &right) const {
+      return less_(left.GetKey(), right.GetKey());
+    }
+    bool operator()(const typename Proxy::value_type &left, const Proxy &right) const {
+      return less_(left.GetKey(), right.GetKey());
+    }
+    bool operator()(const typename Proxy::value_type &left, const typename Proxy::value_type &right) const {
+      return less_(left.GetKey(), right.GetKey());
+    }
+
+  private:
+    const Less less_;
+};
+
+} // namespace detail
+
+template <class KeyIter, class ValueIter> class PairedIterator : public ProxyIterator<detail::JointProxy<KeyIter, ValueIter> > {
+  public:
+    PairedIterator(const KeyIter &key, const ValueIter &value) :
+      ProxyIterator<detail::JointProxy<KeyIter, ValueIter> >(detail::JointProxy<KeyIter, ValueIter>(key, value)) {}
+};
+
+template <class KeyIter, class ValueIter, class Less> void JointSort(const KeyIter &key_begin, const KeyIter &key_end, const ValueIter &value_begin, const Less &less) {
+  ProxyIterator<detail::JointProxy<KeyIter, ValueIter> > full_begin(detail::JointProxy<KeyIter, ValueIter>(key_begin, value_begin));
+  detail::LessWrapper<detail::JointProxy<KeyIter, ValueIter>, Less> less_wrap(less);
+  std::sort(full_begin, full_begin + (key_end - key_begin), less_wrap);
+}
+
+
+template <class KeyIter, class ValueIter> void JointSort(const KeyIter &key_begin, const KeyIter &key_end, const ValueIter &value_begin) {
+  JointSort(key_begin, key_end, value_begin, std::less<typename std::iterator_traits<KeyIter>::value_type>());
+}
+
+} // namespace util
+
+#endif // UTIL_JOINT_SORT_H
diff --git a/mosesdecoder/util/mmap.hh b/mosesdecoder/util/mmap.hh
new file mode 100644
index 0000000000000000000000000000000000000000..b474dc75ba619d94bfa0edd5d5bfab47d0dfb271
--- /dev/null
+++ b/mosesdecoder/util/mmap.hh
@@ -0,0 +1,225 @@
+#ifndef UTIL_MMAP_H
+#define UTIL_MMAP_H
+// Utilities for mmaped files.
+
+#include <cstddef>
+#include <limits>
+
+#include <stdint.h>
+#include <sys/types.h>
+
+namespace util {
+
+class scoped_fd;
+
+std::size_t SizePage();
+
+// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
+class scoped_mmap {
+  public:
+    scoped_mmap() : data_((void*)-1), size_(0) {}
+    scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {}
+    ~scoped_mmap();
+
+    void *get() const { return data_; }
+
+    const uint8_t *begin() const { return reinterpret_cast<uint8_t*>(data_); }
+    const uint8_t *end() const { return reinterpret_cast<uint8_t*>(data_) + size_; }
+    std::size_t size() const { return size_; }
+
+    void reset(void *data, std::size_t size) {
+      scoped_mmap other(data_, size_);
+      data_ = data;
+      size_ = size;
+    }
+
+    void reset() {
+      reset((void*)-1, 0);
+    }
+
+    void *steal() {
+      void *ret = data_;
+      data_ = (void*)-1;
+      size_ = 0;
+      return ret;
+    }
+
+  private:
+    void *data_;
+    std::size_t size_;
+
+    scoped_mmap(const scoped_mmap &);
+    scoped_mmap &operator=(const scoped_mmap &);
+};
+
+/* For when the memory might come from mmap, new char[], or malloc.  Uses NULL
+ * and 0 for blanks even though mmap signals errors with (void*)-1).  The reset
+ * function checks that blank for mmap.
+ */
+class scoped_memory {
+  public:
+    typedef enum {
+      MMAP_ROUND_UP_ALLOCATED, // The size was rounded up to a multiple of page size.  Do the same before munmap.
+      MMAP_ALLOCATED, // munmap
+      MALLOC_ALLOCATED, // free
+      NONE_ALLOCATED // nothing here!
+    } Alloc;
+
+    scoped_memory(void *data, std::size_t size, Alloc source)
+      : data_(data), size_(size), source_(source) {}
+
+    scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
+
+    // Calls HugeMalloc
+    scoped_memory(std::size_t to, bool zero_new);
+
+    ~scoped_memory() { reset(); }
+
+    void *get() const { return data_; }
+    const char *begin() const { return reinterpret_cast<char*>(data_); }
+    const char *end() const { return reinterpret_cast<char*>(data_) + size_; }
+    std::size_t size() const { return size_; }
+
+    Alloc source() const { return source_; }
+
+    void reset() { reset(NULL, 0, NONE_ALLOCATED); }
+
+    void reset(void *data, std::size_t size, Alloc from);
+
+    void *steal() {
+      void *ret = data_;
+      data_ = NULL;
+      size_ = 0;
+      source_ = NONE_ALLOCATED;
+      return ret;
+    }
+
+  private:
+    void *data_;
+    std::size_t size_;
+
+    Alloc source_;
+
+    scoped_memory(const scoped_memory &);
+    scoped_memory &operator=(const scoped_memory &);
+};
+
+extern const int kFileFlags;
+
+// Cross-platform, error-checking wrapper for mmap().
+void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset = 0);
+
+// msync wrapper
+void SyncOrThrow(void *start, size_t length);
+
+// Cross-platform, error-checking wrapper for munmap().
+void UnmapOrThrow(void *start, size_t length);
+
+// Allocate memory, promising that all/vast majority of it will be used.  Tries
+// hard to use huge pages on Linux.
+// If you want zeroed memory, pass zeroed = true.
+void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to);
+
+// Reallocates memory ala realloc but with option to zero the new memory.
+// On Linux, the memory can come from anonymous mmap or malloc/calloc.
+// On non-Linux, only malloc/calloc is supported.
+//
+// To summarize, any memory from HugeMalloc or HugeRealloc can be resized with
+// this.
+void HugeRealloc(std::size_t size, bool new_zeroed, scoped_memory &mem);
+
+typedef enum {
+  // mmap with no prepopulate
+  LAZY,
+  // On linux, pass MAP_POPULATE to mmap.
+  POPULATE_OR_LAZY,
+  // Populate on Linux.  malloc and read on non-Linux.
+  POPULATE_OR_READ,
+  // malloc and read.
+  READ,
+  // malloc and read in parallel (recommended for Lustre)
+  PARALLEL_READ,
+} LoadMethod;
+
+void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out);
+
+// Open file name with mmap of size bytes, all of which are initially zero.
+void *MapZeroedWrite(int fd, std::size_t size);
+void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
+
+// Forward rolling memory map with no overlap.
+class Rolling {
+  public:
+    Rolling() {}
+
+    explicit Rolling(void *data) { Init(data); }
+
+    Rolling(const Rolling &copy_from, uint64_t increase = 0);
+    Rolling &operator=(const Rolling &copy_from);
+
+    // For an actual rolling mmap.
+    explicit Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount);
+
+    // For a static mapping
+    void Init(void *data) {
+      ptr_ = data;
+      current_end_ = std::numeric_limits<uint64_t>::max();
+      current_begin_ = 0;
+      // Mark as a pass-through.
+      fd_ = -1;
+    }
+
+    void IncreaseBase(uint64_t by) {
+      file_begin_ += by;
+      ptr_ = static_cast<uint8_t*>(ptr_) + by;
+      if (!IsPassthrough()) current_end_ = 0;
+    }
+
+    void DecreaseBase(uint64_t by) {
+      file_begin_ -= by;
+      ptr_ = static_cast<uint8_t*>(ptr_) - by;
+      if (!IsPassthrough()) current_end_ = 0;
+    }
+
+    void *ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size);
+
+    // Returns base pointer
+    void *get() const { return ptr_; }
+
+    // Returns base pointer.
+    void *CheckedBase(uint64_t index) {
+      if (index >= current_end_ || index < current_begin_) {
+        Roll(index);
+      }
+      return ptr_;
+    }
+
+    // Returns indexed pointer.
+    void *CheckedIndex(uint64_t index) {
+      return static_cast<uint8_t*>(CheckedBase(index)) + index;
+    }
+
+  private:
+    void Roll(uint64_t index);
+
+    // True if this is just a thin wrapper on a pointer.
+    bool IsPassthrough() const { return fd_ == -1; }
+
+    void *ptr_;
+    uint64_t current_begin_;
+    uint64_t current_end_;
+
+    scoped_memory mem_;
+
+    int fd_;
+    uint64_t file_begin_;
+    uint64_t file_end_;
+
+    bool for_write_;
+    std::size_t block_;
+    std::size_t read_bound_;
+};
+
+} // namespace util
+
+#endif // UTIL_MMAP_H
diff --git a/mosesdecoder/util/multi_intersection.hh b/mosesdecoder/util/multi_intersection.hh
new file mode 100644
index 0000000000000000000000000000000000000000..73954608e6e8118e7dd4679e437d695599f7e9df
--- /dev/null
+++ b/mosesdecoder/util/multi_intersection.hh
@@ -0,0 +1,80 @@
+#ifndef UTIL_MULTI_INTERSECTION_H
+#define UTIL_MULTI_INTERSECTION_H
+
+#include <boost/optional.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <vector>
+
+namespace util {
+
+namespace detail {
+template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
+  bool operator()(const Range &left, const Range &right) const {
+    return left.size() < right.size();
+  }
+};
+
+/* Takes sets specified by their iterators and a boost::optional containing
+ * the lowest intersection if any.  Each set must be sorted in increasing
+ * order.  sets is changed to truncate the beginning of each sequence to the
+ * location of the match or an empty set.  Precondition: sets is not empty
+ * since the intersection over null is the universe and this function does not
+ * know the universe.
+ */
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
+  typedef std::vector<boost::iterator_range<Iterator> > Sets;
+  typedef typename std::iterator_traits<Iterator>::value_type Value;
+
+  assert(!sets.empty());
+
+  if (sets.front().empty()) return boost::optional<Value>();
+  // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
+  Value highest(sets.front().front());
+  for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
+    i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
+    if (i->empty()) return boost::optional<Value>();
+    if (less(highest, i->front())) {
+      highest = i->front();
+      // start over
+      i = sets.begin();
+    } else {
+      ++i;
+    }
+  }
+  return boost::optional<Value>(highest);
+}
+
+} // namespace detail
+
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
+  assert(!sets.empty());
+
+  std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+  return detail::FirstIntersectionSorted(sets, less);
+}
+
+template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
+  return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
+  typedef typename std::iterator_traits<Iterator>::value_type Value;
+  assert(!sets.empty());
+
+  std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+  boost::optional<Value> ret;
+  for (boost::optional<Value> ret; (ret = detail::FirstIntersectionSorted(sets, less)); sets.front().advance_begin(1)) {
+    out(*ret);
+  }
+}
+
+template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
+  AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+} // namespace util
+
+#endif // UTIL_MULTI_INTERSECTION_H
diff --git a/mosesdecoder/util/murmur_hash.hh b/mosesdecoder/util/murmur_hash.hh
new file mode 100644
index 0000000000000000000000000000000000000000..f17157cd926ab763cd388758a907e684f971ed0e
--- /dev/null
+++ b/mosesdecoder/util/murmur_hash.hh
@@ -0,0 +1,18 @@
+#ifndef UTIL_MURMUR_HASH_H
+#define UTIL_MURMUR_HASH_H
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+
+// 64-bit machine version
+uint64_t MurmurHash64A(const void * key, std::size_t len, uint64_t seed = 0);
+// 32-bit machine version (not the same function as above)
+uint64_t MurmurHash64B(const void * key, std::size_t len, uint64_t seed = 0);
+// Use the version for this arch.  Because the values differ across
+// architectures, really only use it for in-memory structures.
+uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed = 0);
+
+} // namespace util
+
+#endif // UTIL_MURMUR_HASH_H
diff --git a/mosesdecoder/util/parallel_read.hh b/mosesdecoder/util/parallel_read.hh
new file mode 100644
index 0000000000000000000000000000000000000000..1e96e79035a93a4a669a9d7d7bd14b146e0cb96a
--- /dev/null
+++ b/mosesdecoder/util/parallel_read.hh
@@ -0,0 +1,16 @@
+#ifndef UTIL_PARALLEL_READ__
+#define UTIL_PARALLEL_READ__
+
+/* Read pieces of a file in parallel.  This has a very specific use case:
+ * reading files from Lustre is CPU bound so multiple threads actually
+ * increases throughput.  Speed matters when an LM takes a terabyte.
+ */
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+void ParallelRead(int fd, void *to, std::size_t amount, uint64_t offset);
+} // namespace util
+
+#endif // UTIL_PARALLEL_READ__
diff --git a/mosesdecoder/util/pcqueue.hh b/mosesdecoder/util/pcqueue.hh
new file mode 100644
index 0000000000000000000000000000000000000000..05c868fba5ccf6c5f5111b1d5fbd4922d88a82c7
--- /dev/null
+++ b/mosesdecoder/util/pcqueue.hh
@@ -0,0 +1,156 @@
+#ifndef UTIL_PCQUEUE_H
+#define UTIL_PCQUEUE_H
+
+#include "util/exception.hh"
+
+#include <boost/interprocess/sync/interprocess_semaphore.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/utility.hpp>
+
+#include <cerrno>
+
+#ifdef __APPLE__
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/mach_traps.h>
+#include <mach/mach.h>
+#endif // __APPLE__
+
+namespace util {
+
+/* OS X Maverick and Boost interprocess were doing "Function not implemented."
+ * So this is my own wrapper around the mach kernel APIs.
+ */
+#ifdef __APPLE__
+
+#define MACH_CALL(call) UTIL_THROW_IF(KERN_SUCCESS != (call), Exception, "Mach call failure")
+
+class Semaphore {
+  public:
+    explicit Semaphore(int value) : task_(mach_task_self()) {
+      MACH_CALL(semaphore_create(task_, &back_, SYNC_POLICY_FIFO, value));
+    }
+
+    ~Semaphore() {
+      MACH_CALL(semaphore_destroy(task_, back_));
+    }
+
+    void wait() {
+      MACH_CALL(semaphore_wait(back_));
+    }
+
+    void post() {
+      MACH_CALL(semaphore_signal(back_));
+    }
+
+  private:
+    semaphore_t back_;
+    task_t task_;
+};
+
+inline void WaitSemaphore(Semaphore &semaphore) {
+  semaphore.wait();
+}
+
+#else
+typedef boost::interprocess::interprocess_semaphore Semaphore;
+
+inline void WaitSemaphore (Semaphore &on) {
+  while (1) {
+    try {
+      on.wait();
+      break;
+    }
+    catch (boost::interprocess::interprocess_exception &e) {
+      if (e.get_native_error() != EINTR) {
+        throw;
+      }
+    }
+  }
+}
+
+#endif // __APPLE__
+
+/**
+ * Producer consumer queue safe for multiple producers and multiple consumers.
+ * T must be default constructable and have operator=.
+ * The value is copied twice for Consume(T &out) or three times for Consume(),
+ * so larger objects should be passed via pointer.
+ * Strong exception guarantee if operator= throws.  Undefined if semaphores throw.
+ */
+template <class T> class PCQueue : boost::noncopyable {
+ public:
+  explicit PCQueue(size_t size)
+   : empty_(size), used_(0),
+     storage_(new T[size]),
+     end_(storage_.get() + size),
+     produce_at_(storage_.get()),
+     consume_at_(storage_.get()) {}
+
+  // Add a value to the queue.
+  void Produce(const T &val) {
+    WaitSemaphore(empty_);
+    {
+      boost::unique_lock<boost::mutex> produce_lock(produce_at_mutex_);
+      try {
+        *produce_at_ = val;
+      }
+      catch (...) {
+        empty_.post();
+        throw;
+      }
+      if (++produce_at_ == end_) produce_at_ = storage_.get();
+    }
+    used_.post();
+  }
+
+  // Consume a value, assigning it to out.
+  T& Consume(T &out) {
+    WaitSemaphore(used_);
+    {
+      boost::unique_lock<boost::mutex> consume_lock(consume_at_mutex_);
+      try {
+        out = *consume_at_;
+      }
+      catch (...) {
+        used_.post();
+        throw;
+      }
+      if (++consume_at_ == end_) consume_at_ = storage_.get();
+    }
+    empty_.post();
+    return out;
+  }
+
+  // Convenience version of Consume that copies the value to return.
+  // The other version is faster.
+  T Consume() {
+    T ret;
+    Consume(ret);
+    return ret;
+  }
+
+ private:
+  // Number of empty spaces in storage_.
+  Semaphore empty_;
+  // Number of occupied spaces in storage_.
+  Semaphore used_;
+
+  boost::scoped_array<T> storage_;
+
+  T *const end_;
+
+  // Index for next write in storage_.
+  T *produce_at_;
+  boost::mutex produce_at_mutex_;
+
+  // Index for next read from storage_.
+  T *consume_at_;
+  boost::mutex consume_at_mutex_;
+
+};
+
+} // namespace util
+
+#endif // UTIL_PCQUEUE_H
diff --git a/mosesdecoder/util/probing_hash_table_test.cc b/mosesdecoder/util/probing_hash_table_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6ed5414318fa5d3fff90d967e82ba7af07e9ec6f
--- /dev/null
+++ b/mosesdecoder/util/probing_hash_table_test.cc
@@ -0,0 +1,102 @@
+#include "util/probing_hash_table.hh"
+
+#include "util/murmur_hash.hh"
+#include "util/scoped.hh"
+
+#define BOOST_TEST_MODULE ProbingHashTableTest
+#include <boost/test/unit_test.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/functional/hash.hpp>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <stdint.h>
+
+namespace util {
+namespace {
+
+struct Entry {
+  unsigned char key;
+  typedef unsigned char Key;
+
+  unsigned char GetKey() const {
+    return key;
+  }
+
+  void SetKey(unsigned char to) {
+    key = to;
+  }
+
+  uint64_t GetValue() const {
+    return value;
+  }
+
+  uint64_t value;
+};
+
+typedef ProbingHashTable<Entry, boost::hash<unsigned char> > Table;
+
+BOOST_AUTO_TEST_CASE(simple) {
+  size_t size = Table::Size(10, 1.2);
+  boost::scoped_array<char> mem(new char[size]);
+  memset(mem.get(), 0, size);
+
+  Table table(mem.get(), size);
+  const Entry *i = NULL;
+  BOOST_CHECK(!table.Find(2, i));
+  Entry to_ins;
+  to_ins.key = 3;
+  to_ins.value = 328920;
+  table.Insert(to_ins);
+  BOOST_REQUIRE(table.Find(3, i));
+  BOOST_CHECK_EQUAL(3, i->GetKey());
+  BOOST_CHECK_EQUAL(static_cast<uint64_t>(328920), i->GetValue());
+  BOOST_CHECK(!table.Find(2, i));
+}
+
+struct Entry64 {
+  uint64_t key;
+  typedef uint64_t Key;
+
+  Entry64() {}
+
+  explicit Entry64(uint64_t key_in) {
+    key = key_in;
+  }
+
+  Key GetKey() const { return key; }
+  void SetKey(uint64_t to) { key = to; }
+};
+
+struct MurmurHashEntry64 {
+  std::size_t operator()(uint64_t value) const {
+    return util::MurmurHash64A(&value, 8);
+  }
+};
+
+typedef ProbingHashTable<Entry64, MurmurHashEntry64> Table64;
+
+BOOST_AUTO_TEST_CASE(Double) {
+  for (std::size_t initial = 19; initial < 30; ++initial) {
+    size_t size = Table64::Size(initial, 1.2);
+    scoped_malloc mem(MallocOrThrow(size));
+    Table64 table(mem.get(), size, std::numeric_limits<uint64_t>::max());
+    table.Clear();
+    for (uint64_t i = 0; i < 19; ++i) {
+      table.Insert(Entry64(i));
+    }
+    table.CheckConsistency();
+    mem.call_realloc(table.DoubleTo());
+    table.Double(mem.get());
+    table.CheckConsistency();
+    for (uint64_t i = 20; i < 40 ; ++i) {
+      table.Insert(Entry64(i));
+    }
+    mem.call_realloc(table.DoubleTo());
+    table.Double(mem.get());
+    table.CheckConsistency();
+  }
+}
+
+} // namespace
+} // namespace util
diff --git a/mosesdecoder/util/random.hh b/mosesdecoder/util/random.hh
new file mode 100644
index 0000000000000000000000000000000000000000..6c2773520a6d7c0454dd7cbc2695316effa5f215
--- /dev/null
+++ b/mosesdecoder/util/random.hh
@@ -0,0 +1,229 @@
+#ifndef UTIL_RANDOM_H
+#define UTIL_RANDOM_H
+
+#include <cstdlib>
+#include <limits>
+
+namespace util
+{
+/** Thread-safe, cross-platform random number generator.
+ *
+ * This is not for proper security-grade randomness, but should be "good
+ * enough" for producing arbitrary values of various numeric types.
+ *
+ * Before starting, call rand_init() to seed the randomizer.  There is no need
+ * to do this more than once; in fact doing it more often is likely to make the
+ * randomizer less effective.  Once that is done, call the rand(), rand_excl(),
+ * and rand_incl() functions as needed to generate pseudo-random numbers.
+ *
+ * Probability distribution is roughly uniform, but for integral types is
+ * skewed slightly towards lower numbers depending on how close "top" comes to
+ * RAND_MAX.
+ *
+ * For floating-point types, resolution is limited; there will actually be
+ * only RAND_MAX different possible values.
+ */
+
+/** Initialize randomizer with a fixed seed.
+ *
+ * After this, unless the randomizer gets seeded again, consecutive calls to
+ * the random functions will return a sequence of pseudo-random numbers
+ * determined by the seed.  Every time the randomizer is seeded with this same
+ * seed, it will again start returning the same sequence of numbers.
+ */
+void rand_init(unsigned int);
+
+/** Initialize randomizer based on current time.
+ *
+ * Call this to make the randomizer return hard-to-predict numbers.  It won't
+ * produce high-grade randomness, but enough to make the program act
+ * differently on different runs.
+ *
+ * The seed will be based on the current time in seconds.  So calling it twice
+ * within the same second will just reset the randomizer to where it was before.
+ * Don't do that.
+ */
+void rand_init();
+
+
+/** Return a pseudorandom number between 0 and RAND_MAX inclusive.
+ *
+ * Initialize (seed) the randomizer before starting to call this.
+ */
+template<typename T> inline T rand();
+
+
+/** Return a pseudorandom number in the half-open interval [bottom, top).
+ *
+ * Generates a value between "bottom" (inclusive) and "top" (exclusive),
+ * assuming that (top - bottom) <= RAND_MAX.
+ */
+template<typename T> inline T rand_excl(T bottom, T top);
+
+
+/** Return a pseudorandom number in the half-open interval [0, top).
+ *
+ * Generates a value between 0 (inclusive) and "top" (exclusive), assuming that
+ * bottom <= RAND_MAX.
+ */
+template<typename T> inline T rand_excl(T top);
+
+
+/** Return a pseudorandom number in the open interval [bottom, top].
+ *
+ * Generates a value between "bottom" and "top" inclusive, assuming that
+ * (top - bottom) < RAND_MAX.
+ */
+template<typename T> inline T rand_incl(T bottom, T top);
+
+
+/** Return a pseudorandom number in the open interval [0, top].
+ *
+ * Generates a value between 0 and "top" inclusive, assuming that
+ * bottom < RAND_MAX.
+ */
+template<typename T> inline T rand_incl(T top);
+
+
+/** Return a pseudorandom number which may be larger than RAND_MAX.
+ *
+ * The requested type must be integral, and its size must be an even multiple
+ * of the size of an int.  The return value will combine one or more random
+ * ints into a single value, which could get quite large.
+ *
+ * The result is nonnegative.  Because the constituent ints are also
+ * nonnegative, the most significant bit in each of the ints will be zero,
+ * so for a wider type, there will be "gaps" in the range of possible outputs.
+ */
+template<typename T> inline T wide_rand();
+
+/** Return a pseudorandom number in [0, top), not limited to RAND_MAX.
+ *
+ * Works like wide_rand(), but if the requested type is wider than an int, it
+ * accommodates larger top values than an int can represent.
+ */
+template<typename T> inline T wide_rand_excl(T top);
+
+/** Return a pseudorandom number in [bottom, top), not limited to RAND_MAX.
+ *
+ * Works like wide_rand(), but if the requested type is wider than an int, it
+ * accommodates larger value ranges than an int can represent.
+ */
+template<typename T> inline T wide_rand_excl(T bottom, T top);
+
+/** Return a pseudorandom number in [0, top], not limited to RAND_MAX.
+ *
+ * Works like wide_rand(), but if the requested type is wider than an int, it
+ * accommodates larger top values than an int can represent.
+ */
+template<typename T> inline T wide_rand_incl(T top);
+
+/** Return a pseudorandom number in [bottom, top], not limited to RAND_MAX.
+ *
+ * Works like wide_rand(), but if the requested type is wider than an int, it
+ * accommodates larger top values than an int can represent.
+ */
+template<typename T> inline T wide_rand_incl(T bottom, T top);
+
+
+/// Implementation detail.  For the random module's internal use only.
+namespace internal
+{
+/// The central call to the randomizer upon which this whole module is built.
+int rand_int();
+
+/// Helper template: customize random values to required ranges.
+template<typename T, bool is_integer_type> struct random_scaler;
+
+/// Specialized random_scaler for integral types.
+template<typename T> struct random_scaler<T, true>
+{
+  static T rnd_excl(T value, T range) { return value % range; }
+  static T rnd_incl(T value, T range) { return value % (range + 1); }
+};
+
+/// Specialized random_scaler for non-integral types.
+template<typename T> struct random_scaler<T, false>
+{
+  static T rnd_excl(T value, T range)
+  {
+    // Promote RAND_MAX to T before adding one to avoid overflow.
+    return range * value / (T(RAND_MAX) + 1);
+  }
+  static T rnd_incl(T value, T range) { return range * value / RAND_MAX; }
+};
+
+/// Helper for filling a wider variable with random ints.
+template<typename T, size_t remaining_ints> struct wide_random_collector
+{
+  static T generate()
+  {
+    T one_int = util::rand<T>() << (8 * sizeof(int));
+    return one_int | wide_random_collector<T, remaining_ints-1>::generate();
+  }
+};
+/// Specialized wide_random_collector for generating just a single int.
+template<typename T> struct wide_random_collector<T, 1>
+{
+  static T generate() { return util::rand<T>(); }
+};
+
+} // namespace internal
+
+
+template<typename T> inline T rand()
+{
+  return T(util::internal::rand_int());
+}
+
+template<typename T> inline T rand_excl(T top)
+{
+  typedef internal::random_scaler<T, std::numeric_limits<T>::is_integer> scaler;
+  return scaler::rnd_excl(util::rand<T>(), top);
+}
+
+template<typename T> inline T rand_excl(T bottom, T top)
+{
+  return bottom + rand_excl(top - bottom);
+}
+
+template<typename T> inline T rand_incl(T top)
+{
+  typedef internal::random_scaler<T, std::numeric_limits<T>::is_integer> scaler;
+  return scaler::rnd_incl(util::rand<T>(), top);
+}
+
+template<typename T> inline T rand_incl(T bottom, T top)
+{
+  return bottom + rand_incl(top - bottom);
+}
+
+template<typename T> inline T wide_rand()
+{
+  return internal::wide_random_collector<T, sizeof(T)/sizeof(int)>::generate();
+}
+
+template<typename T> inline T wide_rand_excl(T top)
+{
+  typedef internal::random_scaler<T, std::numeric_limits<T>::is_integer> scaler;
+  return scaler::rnd_excl(util::wide_rand<T>(), top);
+}
+
+template<typename T> inline T wide_rand_excl(T bottom, T top)
+{
+  return bottom + wide_rand_excl(top - bottom);
+}
+
+template<typename T> inline T wide_rand_incl(T top)
+{
+  typedef internal::random_scaler<T, std::numeric_limits<T>::is_integer> scaler;
+  return scaler::rnd_incl(util::wide_rand<T>(), top);
+}
+
+template<typename T> inline T wide_rand_incl(T bottom, T top)
+{
+  return bottom + wide_rand_incl(top - bottom);
+}
+} // namespace util
+
+#endif
diff --git a/mosesdecoder/util/sorted_uniform_test.cc b/mosesdecoder/util/sorted_uniform_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..39f05e57ecacd50db36686f5a9a10c4e14a620fc
--- /dev/null
+++ b/mosesdecoder/util/sorted_uniform_test.cc
@@ -0,0 +1,127 @@
+#include "util/sorted_uniform.hh"
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int.hpp>
+#include <boost/random/variate_generator.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/unordered_map.hpp>
+
+#define BOOST_TEST_MODULE SortedUniformTest
+#include <boost/test/unit_test.hpp>
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+namespace util {
+namespace {
+
+template <class KeyT, class ValueT> struct Entry {
+  typedef KeyT Key;
+  typedef ValueT Value;
+
+  Key key;
+  Value value;
+
+  Key GetKey() const {
+    return key;
+  }
+
+  Value GetValue() const {
+    return value;
+  }
+
+  bool operator<(const Entry<Key,Value> &other) const {
+    return key < other.key;
+  }
+};
+
+template <class KeyT> struct Accessor {
+  typedef KeyT Key;
+  template <class Value> Key operator()(const Entry<Key, Value> *entry) const {
+    return entry->GetKey();
+  }
+};
+
+template <class Key, class Value> void Check(const Entry<Key, Value> *begin, const Entry<Key, Value> *end, const boost::unordered_map<Key, Value> &reference, const Key key) {
+  typename boost::unordered_map<Key, Value>::const_iterator ref = reference.find(key);
+  typedef const Entry<Key, Value> *It;
+  // g++ can't tell that require will crash and burn.
+  It i = NULL;
+  bool ret = SortedUniformFind<It, Accessor<Key>, Pivot64>(Accessor<Key>(), begin, end, key, i);
+  if (ref == reference.end()) {
+    BOOST_CHECK(!ret);
+  } else {
+    BOOST_REQUIRE(ret);
+    BOOST_CHECK_EQUAL(ref->second, i->GetValue());
+  }
+}
+
+BOOST_AUTO_TEST_CASE(empty) {
+  typedef const Entry<uint64_t, float> T;
+  const T *i;
+  bool ret = SortedUniformFind<const T*, Accessor<uint64_t>, Pivot64>(Accessor<uint64_t>(), (const T*)NULL, (const T*)NULL, (uint64_t)10, i);
+  BOOST_CHECK(!ret);
+}
+
+template <class Key> void RandomTest(Key upper, size_t entries, size_t queries) {
+  typedef unsigned char Value;
+  boost::mt19937 rng;
+  boost::uniform_int<Key> range_key(0, upper);
+  boost::uniform_int<Value> range_value(0, 255);
+  boost::variate_generator<boost::mt19937&, boost::uniform_int<Key> > gen_key(rng, range_key);
+  boost::variate_generator<boost::mt19937&, boost::uniform_int<unsigned char> > gen_value(rng, range_value);
+
+  typedef Entry<Key, Value> Ent;
+  std::vector<Ent> backing;
+  boost::unordered_map<Key, unsigned char> reference;
+  Ent ent;
+  for (size_t i = 0; i < entries; ++i) {
+    Key key = gen_key();
+    unsigned char value = gen_value();
+    if (reference.insert(std::make_pair(key, value)).second) {
+      ent.key = key;
+      ent.value = value;
+      backing.push_back(ent);
+    }
+  }
+  std::sort(backing.begin(), backing.end());
+
+  // Random queries.
+  for (size_t i = 0; i < queries; ++i) {
+    const Key key = gen_key();
+    Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, key);
+  }
+
+  typename boost::unordered_map<Key, unsigned char>::const_iterator it = reference.begin();
+  for (size_t i = 0; (i < queries) && (it != reference.end()); ++i, ++it) {
+    Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, it->second);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(basic) {
+  RandomTest<uint8_t>(11, 10, 200);
+}
+
+BOOST_AUTO_TEST_CASE(tiny_dense_random) {
+  RandomTest<uint8_t>(11, 50, 200);
+}
+
+BOOST_AUTO_TEST_CASE(small_dense_random) {
+  RandomTest<uint8_t>(100, 100, 200);
+}
+
+BOOST_AUTO_TEST_CASE(small_sparse_random) {
+  RandomTest<uint8_t>(200, 15, 200);
+}
+
+BOOST_AUTO_TEST_CASE(medium_sparse_random) {
+  RandomTest<uint16_t>(32000, 1000, 2000);
+}
+
+BOOST_AUTO_TEST_CASE(sparse_random) {
+  RandomTest<uint64_t>(std::numeric_limits<uint64_t>::max(), 100000, 2000);
+}
+
+} // namespace
+} // namespace util
diff --git a/mosesdecoder/util/stream/block.hh b/mosesdecoder/util/stream/block.hh
new file mode 100644
index 0000000000000000000000000000000000000000..42df13f3213522a8e10e0f278c6e90d46f825430
--- /dev/null
+++ b/mosesdecoder/util/stream/block.hh
@@ -0,0 +1,93 @@
+#ifndef UTIL_STREAM_BLOCK_H
+#define UTIL_STREAM_BLOCK_H
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+/**
+ * Encapsulates a block of memory.
+ */
+class Block {
+  public:
+
+    /**
+     * Constructs an empty block.
+     */
+    Block() : mem_(NULL), valid_size_(0) {}
+
+    /**
+     * Constructs a block that encapsulates a segment of memory.
+     *
+     * @param[in] mem  The segment of memory to encapsulate
+     * @param[in] size The size of the memory segment in bytes
+     */
+    Block(void *mem, std::size_t size) : mem_(mem), valid_size_(size) {}
+
+    /**
+     * Set the number of bytes in this block that should be interpreted as valid.
+     *
+     * @param[in] to Number of bytes
+     */
+    void SetValidSize(std::size_t to) { valid_size_ = to; }
+
+    /**
+     * Gets the number of bytes in this block that should be interpreted as valid.
+     * This is important because read might fill in less than Allocated at EOF.
+     */
+    std::size_t ValidSize() const { return valid_size_; }
+
+    /** Gets a void pointer to the memory underlying this block. */
+    void *Get() { return mem_; }
+
+    /** Gets a const void pointer to the memory underlying this block. */
+    const void *Get() const { return mem_; }
+
+
+    /**
+     * Gets a const void pointer to the end of the valid section of memory
+     * encapsulated by this block.
+     */
+    const void *ValidEnd() const {
+      return reinterpret_cast<const uint8_t*>(mem_) + valid_size_;
+    }
+
+    /**
+     * Returns true if this block encapsulates a valid (non-NULL) block of memory.
+     *
+     * This method is a user-defined implicit conversion function to boolean;
+     * among other things, this method enables bare instances of this class
+     * to be used as the condition of an if statement.
+     */
+    operator bool() const { return mem_ != NULL; }
+
+    /**
+     * Returns true if this block is empty.
+     *
+     * In other words, if Get()==NULL, this method will return true.
+     */
+    bool operator!() const { return mem_ == NULL; }
+
+  private:
+    friend class Link;
+    friend class RewindableStream;
+
+    /**
+     * Points this block's memory at NULL.
+     *
+     * This class defines poison as a block whose memory pointer is NULL.
+     */
+    void SetToPoison() {
+      mem_ = NULL;
+    }
+
+    void *mem_;
+    std::size_t valid_size_;
+};
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_BLOCK_H
diff --git a/mosesdecoder/util/stream/chain.hh b/mosesdecoder/util/stream/chain.hh
new file mode 100644
index 0000000000000000000000000000000000000000..2969822601630dd8c3c05924134f9c1c53c2037c
--- /dev/null
+++ b/mosesdecoder/util/stream/chain.hh
@@ -0,0 +1,347 @@
+#ifndef UTIL_STREAM_CHAIN_H
+#define UTIL_STREAM_CHAIN_H
+
+#include "util/stream/block.hh"
+#include "util/stream/config.hh"
+#include "util/stream/multi_progress.hh"
+#include "util/scoped.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/thread/thread.hpp>
+
+#include <cstddef>
+#include <cassert>
+
+namespace util {
+template <class T> class PCQueue;
+namespace stream {
+
+class ChainConfigException : public Exception {
+  public:
+    ChainConfigException() throw();
+    ~ChainConfigException() throw();
+};
+
+class Chain;
+class RewindableStream;
+
+/**
+ * Encapsulates a @ref PCQueue "producer queue" and a @ref PCQueue "consumer queue" within a @ref Chain "chain".
+ *
+ * Specifies position in chain for Link constructor.
+ */
+class ChainPosition {
+  public:
+    const Chain &GetChain() const { return *chain_; }
+  private:
+    friend class Chain;
+    friend class Link;
+    friend class RewindableStream;
+    ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress)
+      : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
+
+    PCQueue<Block> *in_, *out_;
+
+    Chain *chain_;
+
+    WorkerProgress progress_;
+};
+
+
+/**
+ * Encapsulates a worker thread processing data at a given position in the chain.
+ *
+ * Each instance of this class owns one boost thread in which the worker is Run().
+ */
+class Thread {
+  public:
+
+    /**
+     * Constructs a new Thread in which the provided Worker is Run().
+     *
+     * Position is usually ChainPosition but if there are multiple streams involved, this can be ChainPositions.
+     *
+     * After a call to this constructor, the provided worker will be running within a boost thread owned by the newly constructed Thread object.
+     */
+    template <class Position, class Worker> Thread(const Position &position, const Worker &worker)
+      : thread_(boost::ref(*this), position, worker) {}
+
+    ~Thread();
+
+    /**
+     * Launches the provided worker in this object's boost thread.
+     *
+     * This method is called automatically by this class's @ref Thread() "constructor".
+     */
+    template <class Position, class Worker> void operator()(const Position &position, Worker &worker) {
+//      try {
+        worker.Run(position);
+//      } catch (const std::exception &e) {
+//        UnhandledException(e);
+//      }
+    }
+
+  private:
+    void UnhandledException(const std::exception &e);
+
+    boost::thread thread_;
+};
+
+/**
+ * This resets blocks to full valid size.  Used to close the loop in Chain by recycling blocks.
+ */
+class Recycler {
+  public:
+    /**
+     * Resets the blocks in the chain such that the blocks' respective valid sizes match the chain's block size.
+     *
+     * @see Block::SetValidSize()
+     * @see Chain::BlockSize()
+     */
+    void Run(const ChainPosition &position);
+};
+
+extern const Recycler kRecycle;
+class WriteAndRecycle;
+class PWriteAndRecycle;
+
+/**
+ * Represents a sequence of workers, through which @ref Block "blocks" can pass.
+ */
+class Chain {
+  private:
+    template <class T, void (T::*ptr)(const ChainPosition &) = &T::Run> struct CheckForRun {
+      typedef Chain type;
+    };
+
+  public:
+
+    /**
+     * Constructs a configured Chain.
+     *
+     * @param config Specifies how to configure the Chain.
+     */
+    explicit Chain(const ChainConfig &config);
+
+    /**
+     * Destructs a Chain.
+     *
+     * This method waits for the chain's threads to complete,
+     * and frees the memory held by this chain.
+     */
+    ~Chain();
+
+    void ActivateProgress() {
+      assert(!Running());
+      progress_.Activate();
+    }
+
+    void SetProgressTarget(uint64_t target) {
+      progress_.SetTarget(target);
+    }
+
+    /**
+     * Gets the number of bytes in each record of a Block.
+     *
+     * @see ChainConfig::entry_size
+     */
+    std::size_t EntrySize() const {
+      return config_.entry_size;
+    }
+
+    /**
+     * Gets the inital @ref Block::ValidSize "valid size" for @ref Block "blocks" in this chain.
+     *
+     * @see Block::ValidSize
+     */
+    std::size_t BlockSize() const {
+      return block_size_;
+    }
+
+    /**
+     * Number of blocks going through the Chain.
+     */
+    std::size_t BlockCount() const {
+      return config_.block_count;
+    }
+
+    /** Two ways to add to the chain: Add() or operator>>. */
+    ChainPosition Add();
+
+    /**
+     * Adds a new worker to this chain,
+     * and runs that worker in a new Thread owned by this chain.
+     *
+     * The worker must have a Run method that accepts a position argument.
+     *
+     * @see Thread::operator()()
+     */
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
+      assert(!complete_called_);
+      threads_.push_back(new Thread(Add(), worker));
+      return *this;
+    }
+
+  /**
+   * Adds a new worker to this chain (but avoids copying that worker),
+   * and runs that worker in a new Thread owned by this chain.
+   *
+   * The worker must have a Run method that accepts a position argument.
+   *
+   * @see Thread::operator()()
+   */
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
+      assert(!complete_called_);
+      threads_.push_back(new Thread(Add(), worker));
+      return *this;
+    }
+
+    // Note that Link and Stream also define operator>> outside this class.
+
+    // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.
+    void CompleteLoop() {
+      threads_.push_back(new Thread(Complete(), kRecycle));
+    }
+
+    /**
+     * Adds a Recycler worker to this chain,
+     * and runs that worker in a new Thread owned by this chain.
+     */
+    Chain &operator>>(const Recycler &) {
+      CompleteLoop();
+      return *this;
+    }
+
+    /**
+     * Adds a WriteAndRecycle worker to this chain,
+     * and runs that worker in a new Thread owned by this chain.
+     */
+    Chain &operator>>(const WriteAndRecycle &writer);
+    Chain &operator>>(const PWriteAndRecycle &writer);
+
+    // Chains are reusable.  Call Wait to wait for everything to finish and free memory.
+    void Wait(bool release_memory = true);
+
+    // Waits for the current chain to complete (if any) then starts again.
+    void Start();
+
+    bool Running() const { return !queues_.empty(); }
+
+  private:
+    ChainPosition Complete();
+
+    ChainConfig config_;
+
+    std::size_t block_size_;
+
+    scoped_malloc memory_;
+
+    boost::ptr_vector<PCQueue<Block> > queues_;
+
+    bool complete_called_;
+
+    boost::ptr_vector<Thread> threads_;
+
+    MultiProgress progress_;
+};
+
+// Create the link in the worker thread using the position token.
+/**
+ * Represents a C++ style iterator over @ref Block "blocks".
+ */
+class Link {
+  public:
+
+    // Either default construct and Init or just construct all at once.
+
+    /**
+     * Constructs an @ref Init "initialized" link.
+     *
+     * @see Init
+     */
+    explicit Link(const ChainPosition &position);
+
+    /**
+     * Constructs a link that must subsequently be @ref Init "initialized".
+     *
+     * @see Init
+     */
+    Link();
+
+    /**
+     * Initializes the link with the input @ref PCQueue "consumer queue" and output @ref PCQueue "producer queue" at a given @ref ChainPosition "position" in the @ref Chain "chain".
+     *
+     * @see Link()
+     */
+    void Init(const ChainPosition &position);
+
+    /**
+     * Destructs the link object.
+     *
+     * If necessary, this method will pass a poison block
+     * to this link's output @ref PCQueue "producer queue".
+     *
+     * @see Block::SetToPoison()
+     */
+    ~Link();
+
+    /**
+     * Gets a reference to the @ref Block "block" at this link.
+     */
+    Block &operator*() { return current_; }
+
+    /**
+     * Gets a const reference to the @ref Block "block" at this link.
+     */
+    const Block &operator*() const { return current_; }
+
+    /**
+     * Gets a pointer to the @ref Block "block" at this link.
+     */
+    Block *operator->() { return &current_; }
+
+    /**
+     * Gets a const pointer to the @ref Block "block" at this link.
+     */
+    const Block *operator->() const { return &current_; }
+
+    /**
+     * Gets the link at the next @ref ChainPosition "position" in the @ref Chain "chain".
+     */
+    Link &operator++();
+
+    /**
+     * Returns true if the @ref Block "block" at this link encapsulates a valid (non-NULL) block of memory.
+     *
+     * This method is a user-defined implicit conversion function to boolean;
+     * among other things, this method enables bare instances of this class
+     * to be used as the condition of an if statement.
+     */
+    operator bool() const { return current_; }
+
+    /**
+     * @ref Block::SetToPoison() "Poisons" the @ref Block "block" at this link,
+     * and passes this now-poisoned block to this link's output @ref PCQueue "producer queue".
+     *
+     * @see Block::SetToPoison()
+     */
+    void Poison();
+
+  private:
+    Block current_;
+    PCQueue<Block> *in_, *out_;
+
+    bool poisoned_;
+
+    WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, Link &link) {
+  link.Init(chain.Add());
+  return chain;
+}
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_CHAIN_H
diff --git a/mosesdecoder/util/stream/count_records.hh b/mosesdecoder/util/stream/count_records.hh
new file mode 100644
index 0000000000000000000000000000000000000000..e3f7c94afbc044a91dfac393febbe20ead5a8f9d
--- /dev/null
+++ b/mosesdecoder/util/stream/count_records.hh
@@ -0,0 +1,20 @@
+#include <stdint.h>
+
+namespace util { namespace stream {
+
+class ChainPosition;
+
+class CountRecords {
+  public:
+    explicit CountRecords(uint64_t *out)
+      : count_(out) {
+      *count_ = 0;
+    }
+
+    void Run(const ChainPosition &position);
+
+  private:
+    uint64_t *count_;
+};
+
+}} // namespaces
diff --git a/mosesdecoder/util/stream/io.cc b/mosesdecoder/util/stream/io.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c272d779c9b5adee651603051c7be04d2938a78c
--- /dev/null
+++ b/mosesdecoder/util/stream/io.cc
@@ -0,0 +1,78 @@
+#include "util/stream/io.hh"
+
+#include "util/file.hh"
+#include "util/stream/chain.hh"
+
+#include <cstddef>
+
+namespace util {
+namespace stream {
+
+ReadSizeException::ReadSizeException() throw() {}
+ReadSizeException::~ReadSizeException() throw() {}
+
+void Read::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  const std::size_t entry_size = position.GetChain().EntrySize();
+  for (Link link(position); link; ++link) {
+    std::size_t got = util::ReadOrEOF(file_, link->Get(), block_size);
+    UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << ".");
+    if (got == 0) {
+      link.Poison();
+      return;
+    } else {
+      link->SetValidSize(got);
+    }
+  }
+}
+
+void PRead::Run(const ChainPosition &position) {
+  scoped_fd owner;
+  if (own_) owner.reset(file_);
+  const uint64_t size = SizeOrThrow(file_);
+  UTIL_THROW_IF(size % static_cast<uint64_t>(position.GetChain().EntrySize()), ReadSizeException, "File size " << file_ << " size is " << size << " not a multiple of " << position.GetChain().EntrySize());
+  const std::size_t block_size = position.GetChain().BlockSize();
+  const uint64_t block_size64 = static_cast<uint64_t>(block_size);
+  Link link(position);
+  uint64_t offset = 0;
+  for (; offset + block_size64 < size; offset += block_size64, ++link) {
+    ErsatzPRead(file_, link->Get(), block_size, offset);
+    link->SetValidSize(block_size);
+  }
+  // size - offset is <= block_size, so it casts to 32-bit fine.
+  if (size - offset) {
+    ErsatzPRead(file_, link->Get(), size - offset, offset);
+    link->SetValidSize(size - offset);
+    ++link;
+  }
+  link.Poison();
+}
+
+void Write::Run(const ChainPosition &position) {
+  for (Link link(position); link; ++link) {
+    WriteOrThrow(file_, link->Get(), link->ValidSize());
+  }
+}
+
+void WriteAndRecycle::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  for (Link link(position); link; ++link) {
+    WriteOrThrow(file_, link->Get(), link->ValidSize());
+    link->SetValidSize(block_size);
+  }
+}
+
+void PWriteAndRecycle::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  uint64_t offset = 0;
+  for (Link link(position); link; ++link) {
+    ErsatzPWrite(file_, link->Get(), link->ValidSize(), offset);
+    offset += link->ValidSize();
+    link->SetValidSize(block_size);
+  }
+  // Trim file to size.
+  util::ResizeOrThrow(file_, offset);
+}
+
+} // namespace stream
+} // namespace util
diff --git a/mosesdecoder/util/stream/io.hh b/mosesdecoder/util/stream/io.hh
new file mode 100644
index 0000000000000000000000000000000000000000..4605a8a79c1313dffb5186c3adb3e394070e63d1
--- /dev/null
+++ b/mosesdecoder/util/stream/io.hh
@@ -0,0 +1,87 @@
+#ifndef UTIL_STREAM_IO_H
+#define UTIL_STREAM_IO_H
+
+#include "util/exception.hh"
+#include "util/file.hh"
+
+namespace util {
+namespace stream {
+
+class ChainPosition;
+
+class ReadSizeException : public util::Exception {
+  public:
+    ReadSizeException() throw();
+    ~ReadSizeException() throw();
+};
+
+class Read {
+  public:
+    explicit Read(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+// Like read but uses pread so that the file can be accessed from multiple threads.
+class PRead {
+  public:
+    explicit PRead(int fd, bool take_own = false) : file_(fd), own_(take_own) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+    bool own_;
+};
+
+class Write {
+  public:
+    explicit Write(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+// It's a common case that stuff is written and then recycled.  So rather than
+// spawn another thread to Recycle, this combines the two roles.
+class WriteAndRecycle {
+  public:
+    explicit WriteAndRecycle(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+class PWriteAndRecycle {
+  public:
+    explicit PWriteAndRecycle(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+
+// Reuse the same file over and over again to buffer output.
+class FileBuffer {
+  public:
+    explicit FileBuffer(int fd) : file_(fd) {}
+
+    PWriteAndRecycle Sink() const {
+      util::SeekOrThrow(file_.get(), 0);
+      return PWriteAndRecycle(file_.get());
+    }
+
+    PRead Source(bool discard = false) {
+      return PRead(discard ? file_.release() : file_.get(), discard);
+    }
+
+    uint64_t Size() const {
+      return SizeOrThrow(file_.get());
+    }
+
+  private:
+    scoped_fd file_;
+};
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_IO_H
diff --git a/mosesdecoder/util/stream/line_input.cc b/mosesdecoder/util/stream/line_input.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0ad8800f614dd5bf4e332e49c0896a56e87ccd2e
--- /dev/null
+++ b/mosesdecoder/util/stream/line_input.cc
@@ -0,0 +1,52 @@
+#include "util/stream/line_input.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+#include "util/stream/chain.hh"
+
+#include <algorithm>
+#include <vector>
+
+namespace util { namespace stream {
+
+void LineInput::Run(const ChainPosition &position) {
+  ReadCompressed reader(fd_);
+  // Holding area for beginning of line to be placed in next block.
+  std::vector<char> carry;
+
+  for (Link block(position); ; ++block) {
+    char *to = static_cast<char*>(block->Get());
+    char *begin = to;
+    char *end = to + position.GetChain().BlockSize();
+    std::copy(carry.begin(), carry.end(), to);
+    to += carry.size();
+    while (to != end) {
+      std::size_t got = reader.Read(to, end - to);
+      if (!got) {
+        // EOF
+        block->SetValidSize(to - begin);
+        ++block;
+        block.Poison();
+        return;
+      }
+      to += got;
+    }
+
+    // Find the last newline.
+    char *newline;
+    for (newline = to - 1; ; --newline) {
+      UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ".  Is this a text file?");
+      if (*newline == '\n') break;
+    }
+
+    // Copy everything after the last newline to the carry.
+    carry.clear();
+    carry.resize(to - (newline + 1));
+    std::copy(newline + 1, to, &*carry.begin());
+
+    block->SetValidSize(newline + 1 - begin);
+  }
+}
+
+}} // namespaces
diff --git a/mosesdecoder/util/stream/line_input.hh b/mosesdecoder/util/stream/line_input.hh
new file mode 100644
index 0000000000000000000000000000000000000000..a870a6648494775d7c1169e17e2b0a375e984803
--- /dev/null
+++ b/mosesdecoder/util/stream/line_input.hh
@@ -0,0 +1,22 @@
+#ifndef UTIL_STREAM_LINE_INPUT_H
+#define UTIL_STREAM_LINE_INPUT_H
+namespace util {namespace stream {
+
+class ChainPosition;
+
+/* Worker that reads input into blocks, ensuring that blocks contain whole
+ * lines.  Assumes that the maximum size of a line is less than the block size
+ */
+class LineInput {
+  public:
+    // Takes ownership upon thread execution.
+    explicit LineInput(int fd);
+
+    void Run(const ChainPosition &position);
+
+  private:
+    int fd_;
+};
+
+}} // namespaces
+#endif // UTIL_STREAM_LINE_INPUT_H
diff --git a/mosesdecoder/util/stream/multi_progress.cc b/mosesdecoder/util/stream/multi_progress.cc
new file mode 100644
index 0000000000000000000000000000000000000000..59750f516a0e9c60707f06064a8238f41ebd5072
--- /dev/null
+++ b/mosesdecoder/util/stream/multi_progress.cc
@@ -0,0 +1,86 @@
+#include "util/stream/multi_progress.hh"
+
+// TODO: merge some functionality with the simple progress bar?
+#include "util/ersatz_progress.hh"
+
+#include <iostream>
+#include <limits>
+
+#include <cstring>
+
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <unistd.h>
+#endif
+
+namespace util { namespace stream {
+
+namespace {
+const char kDisplayCharacters[] = "-+*#0123456789";
+
+uint64_t Next(unsigned char stone, uint64_t complete) {
+  return (static_cast<uint64_t>(stone + 1) * complete + MultiProgress::kWidth - 1) / MultiProgress::kWidth;
+}
+
+} // namespace
+
+MultiProgress::MultiProgress() : active_(false), complete_(std::numeric_limits<uint64_t>::max()), character_handout_(0) {}
+
+MultiProgress::~MultiProgress() {
+  if (active_ && complete_ != std::numeric_limits<uint64_t>::max())
+    std::cerr << '\n';
+}
+
+void MultiProgress::Activate() {
+  active_ =
+#if !defined(_WIN32) && !defined(_WIN64)
+    // Is stderr a terminal?
+    (isatty(2) == 1)
+#else
+    true
+#endif
+    ;
+}
+
+void MultiProgress::SetTarget(uint64_t complete) {
+  if (!active_) return;
+  complete_ = complete;
+  if (!complete) complete_ = 1;
+  memset(display_, 0, sizeof(display_));
+  character_handout_ = 0;
+  std::cerr << kProgressBanner;
+}
+
+WorkerProgress MultiProgress::Add() {
+  if (!active_)
+    return WorkerProgress(std::numeric_limits<uint64_t>::max(), *this, '\0');
+  std::size_t character_index;
+  {
+    boost::unique_lock<boost::mutex> lock(mutex_);
+    character_index = character_handout_++;
+    if (character_handout_ == sizeof(kDisplayCharacters) - 1)
+      character_handout_ = 0;
+  }
+  return WorkerProgress(Next(0, complete_), *this, kDisplayCharacters[character_index]);
+}
+
+void MultiProgress::Finished() {
+  if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+  std::cerr << '\n';
+  complete_ = std::numeric_limits<uint64_t>::max();
+}
+
+void MultiProgress::Milestone(WorkerProgress &worker) {
+  if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+  unsigned char stone = std::min(static_cast<uint64_t>(kWidth), worker.current_ * kWidth / complete_);
+  for (char *i = &display_[worker.stone_]; i < &display_[stone]; ++i) {
+    *i = worker.character_;
+  }
+  worker.next_ = Next(stone, complete_);
+  worker.stone_ = stone;
+  {
+    boost::unique_lock<boost::mutex> lock(mutex_);
+    std::cerr << '\r' << display_ << std::flush;
+  }
+}
+
+}} // namespaces
diff --git a/mosesdecoder/util/stream/multi_progress.hh b/mosesdecoder/util/stream/multi_progress.hh
new file mode 100644
index 0000000000000000000000000000000000000000..f9e6423e3e32773b9e19810691fb6562bd2d3a62
--- /dev/null
+++ b/mosesdecoder/util/stream/multi_progress.hh
@@ -0,0 +1,89 @@
+/* Progress bar suitable for chains of workers */
+#ifndef UTIL_STREAM_MULTI_PROGRESS_H
+#define UTIL_STREAM_MULTI_PROGRESS_H
+
+#include <boost/thread/mutex.hpp>
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util { namespace stream {
+
+class WorkerProgress;
+
+class MultiProgress {
+  public:
+    static const unsigned char kWidth = 100;
+
+    MultiProgress();
+
+    ~MultiProgress();
+
+    // Turns on showing (requires SetTarget too).
+    void Activate();
+
+    void SetTarget(uint64_t complete);
+
+    WorkerProgress Add();
+
+    void Finished();
+
+  private:
+    friend class WorkerProgress;
+    void Milestone(WorkerProgress &worker);
+
+    bool active_;
+
+    uint64_t complete_;
+
+    boost::mutex mutex_;
+
+    // \0 at the end.
+    char display_[kWidth + 1];
+
+    std::size_t character_handout_;
+
+    MultiProgress(const MultiProgress &);
+    MultiProgress &operator=(const MultiProgress &);
+};
+
+class WorkerProgress {
+  public:
+    // Default contrutor must be initialized with operator= later.
+    WorkerProgress() : parent_(NULL) {}
+
+    // Not threadsafe for the same worker by default.
+    WorkerProgress &operator++() {
+      if (++current_ >= next_) {
+        parent_->Milestone(*this);
+      }
+      return *this;
+    }
+
+    WorkerProgress &operator+=(uint64_t amount) {
+      current_ += amount;
+      if (current_ >= next_) {
+        parent_->Milestone(*this);
+      }
+      return *this;
+    }
+
+  private:
+    friend class MultiProgress;
+    WorkerProgress(uint64_t next, MultiProgress &parent, char character)
+      : current_(0), next_(next), parent_(&parent), stone_(0), character_(character) {}
+
+    uint64_t current_, next_;
+
+    MultiProgress *parent_;
+
+    // Previous milestone reached.
+    unsigned char stone_;
+
+    // Character to display in bar.
+    char character_;
+};
+
+}} // namespaces
+
+#endif // UTIL_STREAM_MULTI_PROGRESS_H
diff --git a/mosesdecoder/util/stream/multi_stream.hh b/mosesdecoder/util/stream/multi_stream.hh
new file mode 100644
index 0000000000000000000000000000000000000000..6381fc2ed659705da65355544616c6c859327de3
--- /dev/null
+++ b/mosesdecoder/util/stream/multi_stream.hh
@@ -0,0 +1,124 @@
+#ifndef UTIL_STREAM_MULTI_STREAM_H
+#define UTIL_STREAM_MULTI_STREAM_H
+
+#include "util/fixed_array.hh"
+#include "util/scoped.hh"
+#include "util/stream/chain.hh"
+#include "util/stream/stream.hh"
+
+#include <cstddef>
+#include <new>
+
+#include <cassert>
+#include <cstdlib>
+
+namespace util { namespace stream {
+
+class Chains;
+
+class ChainPositions : public util::FixedArray<util::stream::ChainPosition> {
+  public:
+    ChainPositions() {}
+
+    explicit ChainPositions(std::size_t bound) :
+      util::FixedArray<util::stream::ChainPosition>(bound) {}
+
+    void Init(Chains &chains);
+
+    explicit ChainPositions(Chains &chains) {
+      Init(chains);
+    }
+};
+
+class Chains : public util::FixedArray<util::stream::Chain> {
+  private:
+    template <class T, void (T::*ptr)(const ChainPositions &) = &T::Run> struct CheckForRun {
+      typedef Chains type;
+    };
+
+  public:
+    // Must call Init.
+    Chains() {}
+
+    explicit Chains(std::size_t limit) : util::FixedArray<util::stream::Chain>(limit) {}
+
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
+      threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
+      return *this;
+    }
+
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
+      threads_.push_back(new util::stream::Thread(ChainPositions(*this), worker));
+      return *this;
+    }
+
+    Chains &operator>>(const util::stream::Recycler &recycler) {
+      for (util::stream::Chain *i = begin(); i != end(); ++i)
+        *i >> recycler;
+      return *this;
+    }
+
+    void Wait(bool release_memory = true) {
+      threads_.clear();
+      for (util::stream::Chain *i = begin(); i != end(); ++i) {
+        i->Wait(release_memory);
+      }
+    }
+
+  private:
+    boost::ptr_vector<util::stream::Thread> threads_;
+
+    Chains(const Chains &);
+    void operator=(const Chains &);
+};
+
+inline void ChainPositions::Init(Chains &chains) {
+  util::FixedArray<util::stream::ChainPosition>::Init(chains.size());
+  for (util::stream::Chain *i = chains.begin(); i != chains.end(); ++i) {
+    // use "placement new" syntax to initalize ChainPosition in an already-allocated memory location
+    new (end()) util::stream::ChainPosition(i->Add()); Constructed();
+  }
+}
+
+inline Chains &operator>>(Chains &chains, ChainPositions &positions) {
+  positions.Init(chains);
+  return chains;
+}
+
+template <class T> class GenericStreams : public util::FixedArray<T> {
+  private:
+    typedef util::FixedArray<T> P;
+  public:
+    GenericStreams() {}
+
+    // Limit restricts to positions[0,limit)
+    void Init(const ChainPositions &positions, std::size_t limit) {
+      P::Init(limit);
+      for (const util::stream::ChainPosition *i = positions.begin(); i != positions.begin() + limit; ++i) {
+        P::push_back(*i);
+      }
+    }
+    void Init(const ChainPositions &positions) {
+      Init(positions, positions.size());
+    }
+
+    GenericStreams(const ChainPositions &positions) {
+      Init(positions);
+    }
+
+    void Init(std::size_t amount) {
+      P::Init(amount);
+    }
+};
+
+template <class T> inline Chains &operator>>(Chains &chains, GenericStreams<T> &streams) {
+  ChainPositions positions;
+  chains >> positions;
+  streams.Init(positions);
+  return chains;
+}
+
+typedef GenericStreams<Stream> Streams;
+
+}} // namespaces
+#endif // UTIL_STREAM_MULTI_STREAM_H
diff --git a/mosesdecoder/util/stream/rewindable_stream.hh b/mosesdecoder/util/stream/rewindable_stream.hh
new file mode 100644
index 0000000000000000000000000000000000000000..560825cde974cfac4e98a870d260a5fd481075fc
--- /dev/null
+++ b/mosesdecoder/util/stream/rewindable_stream.hh
@@ -0,0 +1,132 @@
+#ifndef UTIL_STREAM_REWINDABLE_STREAM_H
+#define UTIL_STREAM_REWINDABLE_STREAM_H
+
+#include "util/stream/chain.hh"
+
+#include <boost/noncopyable.hpp>
+
+#include <deque>
+
+namespace util {
+namespace stream {
+
+/**
+ * A RewindableStream is like a Stream (but one that is only used for
+ * creating input at the start of a chain) except that it can be rewound to
+ * be able to re-write a part of the stream before it is sent. Rewinding
+ * has a limit of 2 * block_size_ - 1 in distance (it does *not* buffer an
+ * entire stream into memory, only a maximum of 2 * block_size_).
+ */
+class RewindableStream : boost::noncopyable {
+  public:
+    /**
+     * Creates an uninitialized RewindableStream. You **must** call Init()
+     * on it later!
+     */
+    RewindableStream();
+
+    ~RewindableStream() {
+      Poison();
+    }
+
+    /**
+     * Initializes an existing RewindableStream at a specific position in
+     * a Chain.
+     *
+     * @param position The position in the chain to get input from and
+     *  produce output on
+     */
+    void Init(const ChainPosition &position);
+
+    /**
+     * Constructs a RewindableStream at a specific position in a Chain all
+     * in one step.
+     *
+     * Equivalent to RewindableStream a(); a.Init(....);
+     */
+    explicit RewindableStream(const ChainPosition &position)
+      : in_(NULL) {
+      Init(position);
+    }
+
+    /**
+     * Gets the record at the current stream position. Const version.
+     */
+    const void *Get() const {
+      assert(!poisoned_);
+      assert(current_);
+      return current_;
+    }
+
+    /**
+     * Gets the record at the current stream position.
+     */
+    void *Get() {
+      assert(!poisoned_);
+      assert(current_);
+      return current_;
+    }
+
+    operator bool() const { return !poisoned_; }
+
+    bool operator!() const { return poisoned_; }
+
+    /**
+     * Marks the current position in the stream to be rewound to later.
+     * Note that you can only rewind back as far as 2 * block_size_ - 1!
+     */
+    void Mark();
+
+    /**
+     * Rewinds the stream back to the marked position. This will throw an
+     * exception if the marked position is too far away.
+     */
+    void Rewind();
+
+    /**
+     * Moves the stream forward to the next record. This internally may
+     * buffer a block for the purposes of rewinding.
+     */
+    RewindableStream& operator++();
+
+    /**
+     * Poisons the stream. This sends any buffered blocks down the chain
+     * and sends a poison block as well (sending at most 2 non-poison and 1
+     * poison block).
+     */
+    void Poison();
+
+  private:
+    void AppendBlock();
+
+    void Flush(std::deque<Block>::iterator to);
+
+    std::deque<Block> blocks_;
+    // current_ is in blocks_[blocks_it_] unless poisoned_.
+    std::size_t blocks_it_;
+
+    std::size_t entry_size_;
+    std::size_t block_size_;
+    std::size_t block_count_;
+
+    uint8_t *marked_, *current_;
+    const uint8_t *block_end_;
+
+    PCQueue<Block> *in_, *out_;
+
+    // Have we hit poison at the end of the stream, even if rewinding?
+    bool hit_poison_;
+    // Is the curren position poison?
+    bool poisoned_;
+
+    WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, RewindableStream &stream) {
+  stream.Init(chain.Add());
+  return chain;
+}
+
+}
+}
+#endif
diff --git a/mosesdecoder/util/stream/rewindable_stream_test.cc b/mosesdecoder/util/stream/rewindable_stream_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f8924c3c74db03e4107b1d8132cf8336cf8f50c2
--- /dev/null
+++ b/mosesdecoder/util/stream/rewindable_stream_test.cc
@@ -0,0 +1,41 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/rewindable_stream.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE RewindableStreamTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace stream {
+namespace {
+
+BOOST_AUTO_TEST_CASE(RewindableStreamTest) {
+  scoped_fd in(MakeTemp("io_test_temp"));
+  for (uint64_t i = 0; i < 100000; ++i) {
+    WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+  }
+  SeekOrThrow(in.get(), 0);
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 100;
+  config.block_count = 6;
+
+  Chain chain(config);
+  RewindableStream s;
+  chain >> Read(in.get()) >> s >> kRecycle;
+  uint64_t i = 0;
+  for (; s; ++s, ++i) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get()));
+    if (100000UL - i == 2)
+      s.Mark();
+  }
+  BOOST_CHECK_EQUAL(100000ULL, i);
+  s.Rewind();
+  BOOST_CHECK_EQUAL(100000ULL - 2, *static_cast<const uint64_t*>(s.Get()));
+}
+
+}
+}
+}
diff --git a/mosesdecoder/util/stream/sort_test.cc b/mosesdecoder/util/stream/sort_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fc97ffdbfea20507f9d7f1d3a5a833e6cdc22e07
--- /dev/null
+++ b/mosesdecoder/util/stream/sort_test.cc
@@ -0,0 +1,62 @@
+#include "util/stream/sort.hh"
+
+#define BOOST_TEST_MODULE SortTest
+#include <boost/test/unit_test.hpp>
+
+#include <algorithm>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+struct CompareUInt64 : public std::binary_function<const void *, const void *, bool> {
+  bool operator()(const void *first, const void *second) const {
+    return *static_cast<const uint64_t*>(first) < *reinterpret_cast<const uint64_t*>(second);
+  }
+};
+
+const uint64_t kSize = 100000;
+
+struct Putter {
+  Putter(std::vector<uint64_t> &shuffled) : shuffled_(shuffled) {}
+
+  void Run(const ChainPosition &position) {
+    Stream put_shuffled(position);
+    for (uint64_t i = 0; i < shuffled_.size(); ++i, ++put_shuffled) {
+      *static_cast<uint64_t*>(put_shuffled.Get()) = shuffled_[i];
+    }
+    put_shuffled.Poison();
+  }
+  std::vector<uint64_t> &shuffled_;
+};
+
+BOOST_AUTO_TEST_CASE(FromShuffled) {
+  std::vector<uint64_t> shuffled;
+  shuffled.reserve(kSize);
+  for (uint64_t i = 0; i < kSize; ++i) {
+    shuffled.push_back(i);
+  }
+  std::random_shuffle(shuffled.begin(), shuffled.end());
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 800;
+  config.block_count = 3;
+
+  SortConfig merge_config;
+  merge_config.temp_prefix = "sort_test_temp";
+  merge_config.buffer_size = 800;
+  merge_config.total_memory = 3300;
+
+  Chain chain(config);
+  chain >> Putter(shuffled);
+  BlockingSort(chain, merge_config, CompareUInt64(), NeverCombine());
+  Stream sorted;
+  chain >> sorted >> kRecycle;
+  for (uint64_t i = 0; i < kSize; ++i, ++sorted) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(sorted.Get()));
+  }
+  BOOST_CHECK(!sorted);
+}
+
+}}} // namespaces
diff --git a/mosesdecoder/util/stream/timer.hh b/mosesdecoder/util/stream/timer.hh
new file mode 100644
index 0000000000000000000000000000000000000000..9e9573d15bed888b194459ffffb52b069429ab2f
--- /dev/null
+++ b/mosesdecoder/util/stream/timer.hh
@@ -0,0 +1,16 @@
+#ifndef UTIL_STREAM_TIMER_H
+#define UTIL_STREAM_TIMER_H
+
+// Sorry Jon, this was adding library dependencies in Moses and people complained.
+
+/*#include <boost/version.hpp>
+
+#if BOOST_VERSION >= 104800
+#include <boost/timer/timer.hpp>
+#define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
+#else
+//#warning Using Boost older than 1.48. Timing information will not be available.*/
+#define UTIL_TIMER(str)
+//#endif
+
+#endif // UTIL_STREAM_TIMER_H
diff --git a/mosesdecoder/util/string_piece.hh b/mosesdecoder/util/string_piece.hh
new file mode 100644
index 0000000000000000000000000000000000000000..4288086922c0f8ad1d4ef73332902e46875e5520
--- /dev/null
+++ b/mosesdecoder/util/string_piece.hh
@@ -0,0 +1,278 @@
+/* If you use ICU in your program, then compile with -DHAVE_ICU -licui18n.  If
+ * you don't use ICU, then this will use the Google implementation from Chrome.
+ * This has been modified from the original version to let you choose.
+ */
+
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copied from strings/stringpiece.h with modifications
+//
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece.  The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+
+#ifndef UTIL_STRING_PIECE_H
+#define UTIL_STRING_PIECE_H
+
+#include "util/have.hh"
+
+#include <cstring>
+#include <iosfwd>
+#include <ostream>
+
+#ifdef HAVE_ICU
+#include <unicode/stringpiece.h>
+#include <unicode/uversion.h>
+
+// Old versions of ICU don't define operator== and operator!=.
+#if (U_ICU_VERSION_MAJOR_NUM < 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM < 4))
+#warning You are using an old version of ICU.  Consider upgrading to ICU >= 4.6.
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+  if (x.size() != y.size())
+    return false;
+
+  return std::memcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+#endif // old version of ICU
+
+U_NAMESPACE_BEGIN
+
+inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) {
+  int longersize = longer.size(), prefixsize = prefix.size();
+  return longersize >= prefixsize && std::memcmp(longer.data(), prefix.data(), prefixsize) == 0;
+}
+
+#else
+
+#include <algorithm>
+#include <cstddef>
+#include <string>
+#include <cstring>
+
+#ifdef WIN32
+#undef max
+#undef min
+#endif
+
+class StringPiece {
+ public:
+  typedef size_t size_type;
+
+ private:
+  const char*   ptr_;
+  size_type     length_;
+
+ public:
+  // We provide non-explicit singleton constructors so users can pass
+  // in a "const char*" or a "string" wherever a "StringPiece" is
+  // expected.
+  StringPiece() : ptr_(NULL), length_(0) { }
+  StringPiece(const char* str)
+    : ptr_(str), length_((str == NULL) ? 0 : strlen(str)) { }
+  StringPiece(const std::string& str)
+    : ptr_(str.data()), length_(str.size()) { }
+  StringPiece(const char* offset, size_type len)
+    : ptr_(offset), length_(len) { }
+
+  // data() may return a pointer to a buffer with embedded NULs, and the
+  // returned buffer may or may not be null terminated.  Therefore it is
+  // typically a mistake to pass data() to a routine that expects a NUL
+  // terminated string.
+  const char* data() const { return ptr_; }
+  size_type size() const { return length_; }
+  size_type length() const { return length_; }
+  bool empty() const { return length_ == 0; }
+
+  void clear() { ptr_ = NULL; length_ = 0; }
+  void set(const char* data, size_type len) { ptr_ = data; length_ = len; }
+  void set(const char* str) {
+    ptr_ = str;
+    length_ = str ? strlen(str) : 0;
+  }
+  void set(const void* data, size_type len) {
+    ptr_ = reinterpret_cast<const char*>(data);
+    length_ = len;
+  }
+
+  char operator[](size_type i) const { return ptr_[i]; }
+
+  void remove_prefix(size_type n) {
+    ptr_ += n;
+    length_ -= n;
+  }
+
+  void remove_suffix(size_type n) {
+    length_ -= n;
+  }
+
+  int compare(const StringPiece& x) const {
+    int r = wordmemcmp(ptr_, x.ptr_, std::min(length_, x.length_));
+    if (r == 0) {
+      if (length_ < x.length_) r = -1;
+      else if (length_ > x.length_) r = +1;
+    }
+    return r;
+  }
+
+  std::string as_string() const {
+    // std::string doesn't like to take a NULL pointer even with a 0 size.
+    return std::string(!empty() ? data() : "", size());
+  }
+
+  void CopyToString(std::string* target) const;
+  void AppendToString(std::string* target) const;
+
+  // Does "this" start with "x"
+  bool starts_with(const StringPiece& x) const {
+    return ((length_ >= x.length_) &&
+            (wordmemcmp(ptr_, x.ptr_, x.length_) == 0));
+  }
+
+  // Does "this" end with "x"
+  bool ends_with(const StringPiece& x) const {
+    return ((length_ >= x.length_) &&
+            (wordmemcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
+  }
+
+  // standard STL container boilerplate
+  typedef char value_type;
+  typedef const char* pointer;
+  typedef const char& reference;
+  typedef const char& const_reference;
+  typedef ptrdiff_t difference_type;
+  static const size_type npos;
+  typedef const char* const_iterator;
+  typedef const char* iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  iterator begin() const { return ptr_; }
+  iterator end() const { return ptr_ + length_; }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(ptr_ + length_);
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(ptr_);
+  }
+
+  size_type max_size() const { return length_; }
+  size_type capacity() const { return length_; }
+
+  size_type copy(char* buf, size_type n, size_type pos = 0) const;
+
+  size_type find(const StringPiece& s, size_type pos = 0) const;
+  size_type find(char c, size_type pos = 0) const;
+  size_type rfind(const StringPiece& s, size_type pos = npos) const;
+  size_type rfind(char c, size_type pos = npos) const;
+
+  size_type find_first_of(const StringPiece& s, size_type pos = 0) const;
+  size_type find_first_of(char c, size_type pos = 0) const {
+    return find(c, pos);
+  }
+  size_type find_first_not_of(const StringPiece& s, size_type pos = 0) const;
+  size_type find_first_not_of(char c, size_type pos = 0) const;
+  size_type find_last_of(const StringPiece& s, size_type pos = npos) const;
+  size_type find_last_of(char c, size_type pos = npos) const {
+    return rfind(c, pos);
+  }
+  size_type find_last_not_of(const StringPiece& s, size_type pos = npos) const;
+  size_type find_last_not_of(char c, size_type pos = npos) const;
+
+  StringPiece substr(size_type pos, size_type n = npos) const;
+
+  static int wordmemcmp(const char* p, const char* p2, size_type N) {
+    return std::memcmp(p, p2, N);
+  }
+};
+
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+  if (x.size() != y.size())
+    return false;
+
+  return std::memcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) {
+  return longer.starts_with(prefix);
+}
+
+#endif // HAVE_ICU undefined
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+  const int r = std::memcmp(x.data(), y.data(),
+                                       std::min(x.size(), y.size()));
+  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+  return !(x < y);
+}
+
+inline StringPiece Trim(const StringPiece& str, const std::string dropChars = " \t\n\r")
+{
+  StringPiece::size_type startPos = str.find_first_not_of(dropChars);
+  StringPiece::size_type endPos = str.find_last_not_of(dropChars);
+  StringPiece ret = str.substr(startPos, endPos - startPos + 1);
+  return ret;
+}
+
+// allow StringPiece to be logged (needed for unit testing).
+inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
+  return o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
+}
+
+#ifdef HAVE_ICU
+U_NAMESPACE_END
+using U_NAMESPACE_QUALIFIER StringPiece;
+#endif
+
+#endif  // UTIL_STRING_PIECE_H
diff --git a/mosesdecoder/util/string_stream.hh b/mosesdecoder/util/string_stream.hh
new file mode 100644
index 0000000000000000000000000000000000000000..28fdd4219900c500b6e7d7a89d9e01fbdc50f903
--- /dev/null
+++ b/mosesdecoder/util/string_stream.hh
@@ -0,0 +1,48 @@
+#ifndef UTIL_STRING_STREAM_H
+#define UTIL_STRING_STREAM_H
+
+#include "util/fake_ostream.hh"
+
+#include <cassert>
+#include <string>
+
+namespace util {
+
+class StringStream : public FakeOStream<StringStream> {
+  public:
+    StringStream() {}
+
+    StringStream &flush() { return *this; }
+
+    StringStream &write(const void *data, std::size_t length) {
+      out_.append(static_cast<const char*>(data), length);
+      return *this;
+    }
+
+    const std::string &str() const { return out_; }
+
+    void str(const std::string &val) { out_ = val; }
+
+    void swap(std::string &str) { std::swap(out_, str); }
+
+  protected:
+    friend class FakeOStream<StringStream>;
+    char *Ensure(std::size_t amount) {
+      std::size_t current = out_.size();
+      out_.resize(out_.size() + amount);
+      return &out_[current];
+    }
+
+    void AdvanceTo(char *to) {
+      assert(to <= &*out_.end());
+      assert(to >= &*out_.begin());
+      out_.resize(to - &*out_.begin());
+    }
+
+  private:
+    std::string out_;
+};
+
+} // namespace
+
+#endif // UTIL_STRING_STREAM_H