sleepyhead111 commited on Apr 20, 2025

Commit

9543496

verified ·

1 Parent(s): 55f12b9

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

mosesdecoder/contrib/goshen-chrome/README.md +93 -0
mosesdecoder/contrib/goshen-chrome/icon.png +0 -0
mosesdecoder/contrib/goshen-chrome/manifest.json +29 -0
mosesdecoder/contrib/goshen-chrome/style.less +22 -0
mosesdecoder/contrib/lmserver/COPYING +33 -0
mosesdecoder/contrib/lmserver/Makefile.am +18 -0
mosesdecoder/contrib/lmserver/NEWS +0 -0
mosesdecoder/contrib/lmserver/config.h.in +97 -0
mosesdecoder/contrib/lmserver/config.status +1207 -0
mosesdecoder/contrib/lmserver/config.sub +1676 -0
mosesdecoder/contrib/lmserver/configure.ac +235 -0
mosesdecoder/contrib/lmserver/depcomp +589 -0
mosesdecoder/contrib/lmserver/lmserver.h +375 -0
mosesdecoder/contrib/lmserver/stamp-h1 +1 -0
mosesdecoder/contrib/mert-moses-multi.pl +1529 -0
mosesdecoder/contrib/relent-filter/src/IOWrapper.cpp +580 -0
mosesdecoder/contrib/relent-filter/src/Jamfile +6 -0
mosesdecoder/contrib/relent-filter/src/Main.h +39 -0
mosesdecoder/contrib/relent-filter/src/TranslationAnalysis.cpp +126 -0
mosesdecoder/contrib/relent-filter/src/mbr.h +28 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h +180 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-beam.h +164 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-const.h +125 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-crf.h +359 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h +479 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h +242 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-gauss.h +287 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hash.h +105 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-heap.h +181 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h +397 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-iomacros.h +63 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-list.h +481 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-matrix.h +177 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h +126 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-prob.h +136 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-racpt.h +332 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-randvar.h +593 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-refrv.h +74 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-timer.h +52 -0
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-tree.h +43 -0
mosesdecoder/contrib/zmert-moses.pl +1121 -0
mosesdecoder/moses/TranslationModel/UG/TargetPhraseCollectionCache.h +47 -0
mosesdecoder/moses/TranslationModel/UG/bitext-find.cc +151 -0
mosesdecoder/moses/TranslationModel/UG/check-coverage.cc +82 -0
mosesdecoder/moses/TranslationModel/UG/filter-pt.cc +669 -0
mosesdecoder/moses/TranslationModel/UG/ptable-describe-features.cc +40 -0
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h +39 -0
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_logcnt.h +62 -0
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_rareness.h +38 -0
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_wordcount.h +33 -0

mosesdecoder/contrib/goshen-chrome/README.md ADDED Viewed

	@@ -0,0 +1,93 @@

+# goshen
+Goshen is a Chrome extension that duplicates the utility of the Google Translate chrome extension for on-page website translation, using the Goshen JavaScript library with Moses as a backend translator. (It also has the ability to swap in an arbitrary translation engine, if the appropriate adapters are written.)
+## 1. The Goshen.js Library
+As Google Translate is the current go-to machine-translation system for developers, I intend to make Moses a viable alternative for even the non-savvy developer. This is in large part simplified by having an easily deployed (perhaps Dockerized) Moses server, as mentioned in the section above. However, it is also greatly simplified by exposing a comprehensive and well-formed JavaScript API that allows the same level of flexibility as the existing Google API.
+Instead of trying to duplicate the Google Translate API, I instead chose to write a wrapper for *any* translation engine. An engine with an exposed HTTP endpoint can be added to the Goshen translation library by implementing `GoshenAdapter`, for which I have provided a complete `moses-mt-server` implementation (`MosesGoshenAdapter`) and a partially complete proof of concept for Google Translate (`GoogleTranslateGoshenAdapter`). This is to illustrate that the engines can be used interchangeably for simple translation tasks, but the entirety of Moses functionality can be accessed whereas Google Translate's public API fails to accommodate some more technical tasks.
+The library is both commented and minified, available in the `goshenlib/` directory, [here](https://github.com/j6k4m8/goshen-moses). It is also possible to import the unminified, importable version from `goshenlib/dist`. The complete documentation, as well as usage examples and implementation explanations and justifications, are available in `goshenlib/docs` at the above repository.
+## 2. Chrome Extension
+This directory contains a Chrome extension that utilizes the CASMACAT moses-mt-server/Moses backend to provide a frontend website translation service. The extension automatically detects the relevant content of most articles or body-text on the page, and at the user's request, translates it to the requested language. Usage is explained below, as well as inside the extension popup after installation, for quick reference.
+### Usage
+1. **Install the unpacked extension.** Go to `chrome://extensions` and click <kbd>Load Unpacked Extension</kbd>. Navigate to this `goshen-chrome/` directory, and load.
+2. This adds a Goshen icon to your Chrome toolbar. Clicking it brings up a simple modal that allows the switching of languages.
+3. Use the <kbd>Alt</kbd>+<kbd>T</kbd> key-chord ("T" for "Translate") to begin text-selection. The Goshen-translate extension will highlight elements of text in cyan as you mouse over them: To translate what is currently highlighted, click.
+## Goshen.js Documentatio
+### Overview
+The Goshen library provides a web-developer-facing library for handling machine translation. It allows interaction with arbitrary machine translation services, agnostic of the technology or algorithm stack.
+### Usage
+A very brief tutorial is provided here:
+- Create a new Goshen object. Use the MosesGoshenAdapter, so that translations are handled by a Moses MT server.
+    ```JavaScript
+    g = new Goshen('localhost:3000', 'http', MosesGoshenAdapter);
+    ```
+- Use the Goshen object to pass a translation job to the Moses adapter. The adapter will pass back a completed translation once the job completes.
+    ```JavaScript
+    g.translate('This is a simple sentence.', Languages.ENGLISH, Languages.SPANISH);
+    ```
+- You can also optionally pass a callback function to the .translate method:
+    ```JavaScript
+    g.translate('This is a simple sentence.',
+                Languages.ENGLISH,
+                Languages.SPANISH,
+                function(err, val) {
+        if (!!err) {
+            console.warn("Encountered an error: " + err);
+        } else {
+            console.info("Translated to: " + val);
+        }
+    });
+    ```
+    If a callback is supplied, the function is run on a new thread, and is non-blocking. If one is not supplied, then the return value of the function contains the translated text. `undefined` is returned if the translation fails.
+### `Goshen`
+The generic class for a Goshen.js object, the object that handles translation with an arbitrary translation backend. In order to specify a backend, pass a `type` parameter to the constructor. (Default is Moses, of course!)
+- `Goshen`
+    - Arguments:
+        - `hostname`: A string hostname, such as `locahost:8000`. This is the base URL for formulating the RESTful API endpoint.
+        - `protocol`: The HTTP protocol. Either `http` or `https`.
+        - `type`: What type of GoshenAdapter to use. Options are currently `GoogleTranslateGoshenAdapter` or `MosesGoshenAdapter`.
+        - `opts`: A dictonary of options to pass to the adapter constructor. Currently, none are required for existing adapters.
+- function `url`
+    Generate a complete URI. If `hostname` is `localhost:8000` and `protocol` is `https`, then `this.url('foo')` returns `https://localhost:8000/foo`
+    - Arguments:
+        - `suffix`: A suffix to concatenate onto the end of a well-formed URI.
+    - Returns:
+        - String: The complete web-accessible URL.
+- function `translate`
+    Translate a text from a source language to a target language.
+    - Arguments:
+        - `text`: The text to translate. If this is too long, a series of truncated versions are translated, splitting on sentence-delimiters if possible.
+        - `source`: An item from the `LANGUAGES` set (e.g. `'en-us'`)
+        - `target`: An item from the `LANGUAGES` set (e.g. `'en-us'`)
+        - `callback`: Optional. If supplied, must be a function (or be of a callable type) that will be run with `errors` and `value` as its two arguments.
+    - Returns:
+        - String: The translated text. All supplementary data, such as alignments or language detections, are ignored by this function.
+### `GoshenAdapter`
+The `Goshen` class secretly outsources all of its computation to a GoshenAdapter class attribute, which is responsible for performing the machine translation. `GoshenAdapter`s should expose `url` and `translate` functions unambiguously, with the same signatures as those in the `Goshen` class. Other functions may be optionally exposed.
+#### `MosesGoshenAdapter`
+This is one particular implementation of the `GoshenAdapter` type, that uses the `moses-mt-server` backend as its translation engine API endpoint. It splits text into manageable chunks when translating, to avoid crashing the underlying Moses server (RAM allocation fail).
+#### `GoogleTranslateGoshenAdapter`
+This is another implementation of the `GoshenAdapter` type, that uses the Google Translate API as its translation engine endpoint. Because Google handles arbitrarily long text, this adapter does not split text, as `MosesGoshenAdapter`s do.
+For more information, see [this full report](https://github.com/j6k4m8/goshen-moses/blob/master/report/report.md), or contact Jordan Matelsky (@j6k4m8).

mosesdecoder/contrib/goshen-chrome/icon.png ADDED Viewed

mosesdecoder/contrib/goshen-chrome/manifest.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "manifest_version": 2,
+    "name": "Goshen Web Translator",
+    "description": "Translate entire webpages with a casmacat-moses backend",
+    "version": "1.0",
+    "browser_action": {
+        "default_icon": "icon.png",
+        "default_popup": "popup/popup.html"
+    },
+    "permissions": [
+        "activeTab",
+        "storage",
+        "https://ajax.googleapis.com/"
+    ],
+    "options_page" : "options/index.html",
+    "content_scripts": [{
+        "matches": ["http://*/*", "https://*/*", "file:///*"],
+        "css": ["onpage/onpage.css"],
+        "js": [
+            "onpage/onpage.js",
+            "onpage/goshen.js",
+            "onpage/chromegoshen.js"
+        ],
+        "all_frames": true
+    }]
+}

mosesdecoder/contrib/goshen-chrome/style.less ADDED Viewed

	@@ -0,0 +1,22 @@

+* {
+    box-sizing: border-box;
+}
+.container {
+    width: 100%;
+    .dropdown-container, .arrow-container {
+        display: inline-block;
+        width: 40%;
+    }
+    button {
+        border-radius: 0;
+        background: #09f;
+        color: white;
+        text-transform: uppercase;
+        padding: 1em;
+        border: none;
+        cursor: pointer;
+        letter-spacing: 0.1em;
+        font-size: 1.1em;
+    }
+}

mosesdecoder/contrib/lmserver/COPYING ADDED Viewed

	@@ -0,0 +1,33 @@

+Copyright (c) 2009, Chris Dyer
+Portions of this software are
+Copyright (c) 2003, Danga Interactive, Inc.
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of the Danga Interactive nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

mosesdecoder/contrib/lmserver/Makefile.am ADDED Viewed

	@@ -0,0 +1,18 @@

+bin_PROGRAMS = lmserver lmserver-debug
+lmserver_SOURCES = lmserver.c lmserver.h thread.c srilm.cc
+lmserver_debug_SOURCES = $(lmserver_SOURCES)
+lmserver_CPPFLAGS = -DNDEBUG
+lmserver_LDADD = @DAEMON_OBJ@
+lmserver_debug_LDADD = @DAEMON_OBJ@
+lmserver_DEPENDENCIES = @DAEMON_OBJ@
+lmserver_debug_DEPENDENCIES = @DAEMON_OBJ@
+DIST_DIRS = examples
+EXTRA_DIST = examples daemon.c
+dist-hook:
+	rm -rf $(distdir)/doc/.svn/
+	rm -rf $(distdir)/scripts/.svn/
+	rm -rf $(distdir)/t/.svn/
+	rm -rf $(distdir)/t/lib/.svn/

mosesdecoder/contrib/lmserver/NEWS ADDED Viewed

File without changes

mosesdecoder/contrib/lmserver/config.h.in ADDED Viewed

	@@ -0,0 +1,97 @@

+/* config.h.in.  Generated from configure.ac by autoheader.  */
+/* machine is bigendian */
+#undef ENDIAN_BIG
+/* machine is littleendian */
+#undef ENDIAN_LITTLE
+/* Define this if you have daemon() */
+#undef HAVE_DAEMON
+/* Define to 1 if you have the `getpagesizes' function. */
+#undef HAVE_GETPAGESIZES
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+/* do we have malloc.h? */
+#undef HAVE_MALLOC_H
+/* Define to 1 if you have the `memcntl' function. */
+#undef HAVE_MEMCNTL
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+/* Define to 1 if you have the `mlockall' function. */
+#undef HAVE_MLOCKALL
+/* flag for SRILM */
+#undef HAVE_SRILM
+/* Define to 1 if stdbool.h conforms to C99. */
+#undef HAVE_STDBOOL_H
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+/* do we have stuct mallinfo? */
+#undef HAVE_STRUCT_MALLINFO
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+/* Define to 1 if the system has the type `_Bool'. */
+#undef HAVE__BOOL
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+/* Name of package */
+#undef PACKAGE
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+/* Define this if you want to use pthreads */
+#undef USE_THREADS
+/* Version number of package */
+#undef VERSION
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+/* define to int if socklen_t not available */
+#undef socklen_t

mosesdecoder/contrib/lmserver/config.status ADDED Viewed

	@@ -0,0 +1,1207 @@

+#! /bin/sh
+# Generated by configure.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=${CONFIG_SHELL-/bin/sh}
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+  set -o posix
+fi
+DUALCASE=1; export DUALCASE # for MKS sh
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    $as_unset $as_var
+  fi
+done
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)$' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+  	  /^X\/\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\/\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x$as_lineno_3"  = "x$as_lineno_2"  || {
+  # Find who we are.  Look in the path if we contain no path at all
+  # relative or not.
+  case $0 in
+    *[\\/]* ) as_myself=$0 ;;
+    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+       ;;
+  esac
+  # We did not find ourselves, most probably we were run as `sh COMMAND'
+  # in which case we are not to be found in the path.
+  if test "x$as_myself" = x; then
+    as_myself=$0
+  fi
+  if test ! -f "$as_myself"; then
+    { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
+echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+  case $CONFIG_SHELL in
+  '')
+    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for as_base in sh bash ksh sh5; do
+	 case $as_dir in
+	 /*)
+	   if ("$as_dir/$as_base" -c '
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x$as_lineno_3"  = "x$as_lineno_2" ') 2>/dev/null; then
+	     $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+	     $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+	     CONFIG_SHELL=$as_dir/$as_base
+	     export CONFIG_SHELL
+	     exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+	   fi;;
+	 esac
+       done
+done
+;;
+  esac
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line before each line; the second 'sed' does the real
+  # work.  The second script uses 'N' to pair each line-number line
+  # with the numbered line, and appends trailing '-' during
+  # substitution so that $LINENO is not a special case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # second 'sed' script.  Blame Lee E. McMahon for sed's syntax.  :-)
+  sed '=' <$as_myself |
+    sed '
+      N
+      s,$,-,
+      : loop
+      s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+      t loop
+      s,-$,,
+      s,^['$as_cr_digits']*\n,,
+    ' >$as_me.lineno &&
+  chmod +x $as_me.lineno ||
+    { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
+echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+   { (exit 1); exit 1; }; }
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensible to this).
+  . ./$as_me.lineno
+  # Exit status is that of the last command.
+  exit
+}
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+  *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T='	' ;;
+  *c*,*  ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+  *)       ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+if expr a : '\(a\)' >/dev/null 2>&1; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  # We could just check for DJGPP; but this test a) works b) is more generic
+  # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+  if test -f conf$$.exe; then
+    # Don't use ln at all; we don't have any links
+    as_ln_s='cp -p'
+  else
+    as_ln_s='ln -s'
+  fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+as_executable_p="test -f"
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" 	$as_nl"
+# CDPATH.
+$as_unset CDPATH
+exec 6>&1
+# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.  Logging --version etc. is OK.
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+} >&5
+cat >&5 <<_CSEOF
+This file was extended by lmserver $as_me 1.0, which was
+generated by GNU Autoconf 2.59.  Invocation command line was
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+_CSEOF
+echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
+echo >&5
+config_files=" Makefile"
+config_headers=" config.h"
+config_commands=" depfiles"
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+Usage: $0 [OPTIONS] [FILE]...
+  -h, --help       print this help, then exit
+  -V, --version    print version number, then exit
+  -q, --quiet      do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+  --file=FILE[:TEMPLATE]
+		   instantiate the configuration file FILE
+  --header=FILE[:TEMPLATE]
+		   instantiate the configuration header FILE
+Configuration files:
+$config_files
+Configuration headers:
+$config_headers
+Configuration commands:
+$config_commands
+Report bugs to <bug-autoconf@gnu.org>."
+ac_cs_version="\
+lmserver config.status 1.0
+configured by ./configure, generated by GNU Autoconf 2.59,
+  with options \"'--with-libevent=/fs/clip-software/libevent-1.4.8-stable' '--with-srilm=/fs/clip-software/srilm-1.5.6-PIC'\"
+Copyright (C) 2003 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+srcdir=.
+INSTALL="/usr/bin/install -c"
+# If no file are specified by the user, then we need to provide default
+# value.  By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "x$1" : 'x\([^=]*\)='`
+    ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  -*)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  *) # This is not an option, so the user has probably given explicit
+     # arguments.
+     ac_option=$1
+     ac_need_defaults=false;;
+  esac
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --vers* | -V )
+    echo "$ac_cs_version"; exit 0 ;;
+  --he | --h)
+    # Conflict between --help and --header
+    { { echo "$as_me:$LINENO: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2;}
+   { (exit 1); exit 1; }; };;
+  --help | --hel | -h )
+    echo "$ac_cs_usage"; exit 0 ;;
+  --debug | --d* | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+    ac_need_defaults=false;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+  # This is an error.
+  -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2;}
+   { (exit 1); exit 1; }; } ;;
+  *) ac_config_targets="$ac_config_targets $1" ;;
+  esac
+  shift
+done
+ac_configure_extra_args=
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+if $ac_cs_recheck; then
+  echo "running /bin/sh ./configure " '--with-libevent=/fs/clip-software/libevent-1.4.8-stable' '--with-srilm=/fs/clip-software/srilm-1.5.6-PIC' $ac_configure_extra_args " --no-create --no-recursion" >&6
+  exec /bin/sh ./configure '--with-libevent=/fs/clip-software/libevent-1.4.8-stable' '--with-srilm=/fs/clip-software/srilm-1.5.6-PIC' $ac_configure_extra_args --no-create --no-recursion
+fi
+#
+# INIT-COMMANDS section.
+#
+AMDEP_TRUE="" ac_aux_dir="."
+for ac_config_target in $ac_config_targets
+do
+  case "$ac_config_target" in
+  # Handling of arguments.
+  "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+  "depfiles" ) CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
+  "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
+  *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+   { (exit 1); exit 1; }; };;
+  esac
+done
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason to put it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+  trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+{
+  tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./confstat$$-$RANDOM
+  (umask 077 && mkdir $tmp)
+} ||
+{
+   echo "$me: cannot create a temporary directory in ." >&2
+   { (exit 1); exit 1; }
+}
+#
+# CONFIG_FILES section.
+#
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "$CONFIG_FILES"; then
+  # Protect against being on the right side of a sed subst in config.status.
+  sed 's/,@/@@/; s/@,/@@/; s/,;t t$/@;t t/; /@;t t$/s/[\\&,]/\\&/g;
+   s/@@/,@/; s/@@/@,/; s/@;t t$/,;t t/' >$tmp/subs.sed <<\CEOF
+s,@SHELL@,/bin/sh,;t t
+s,@PATH_SEPARATOR@,:,;t t
+s,@PACKAGE_NAME@,lmserver,;t t
+s,@PACKAGE_TARNAME@,lmserver,;t t
+s,@PACKAGE_VERSION@,1.0,;t t
+s,@PACKAGE_STRING@,lmserver 1.0,;t t
+s,@PACKAGE_BUGREPORT@,,;t t
+s,@exec_prefix@,${prefix},;t t
+s,@prefix@,/usr/local,;t t
+s,@program_transform_name@,s,x,x,,;t t
+s,@bindir@,${exec_prefix}/bin,;t t
+s,@sbindir@,${exec_prefix}/sbin,;t t
+s,@libexecdir@,${exec_prefix}/libexec,;t t
+s,@datadir@,${prefix}/share,;t t
+s,@sysconfdir@,${prefix}/etc,;t t
+s,@sharedstatedir@,${prefix}/com,;t t
+s,@localstatedir@,${prefix}/var,;t t
+s,@libdir@,${exec_prefix}/lib,;t t
+s,@includedir@,${prefix}/include,;t t
+s,@oldincludedir@,/usr/include,;t t
+s,@infodir@,${prefix}/info,;t t
+s,@mandir@,${prefix}/man,;t t
+s,@build_alias@,,;t t
+s,@host_alias@,,;t t
+s,@target_alias@,,;t t
+s,@DEFS@,-DHAVE_CONFIG_H,;t t
+s,@ECHO_C@,,;t t
+s,@ECHO_N@,-n,;t t
+s,@ECHO_T@,,;t t
+s,@LIBS@, -loolm -ldstruct -lmisc -levent,;t t
+s,@build@,x86_64-unknown-linux-gnu,;t t
+s,@build_cpu@,x86_64,;t t
+s,@build_vendor@,unknown,;t t
+s,@build_os@,linux-gnu,;t t
+s,@host@,x86_64-unknown-linux-gnu,;t t
+s,@host_cpu@,x86_64,;t t
+s,@host_vendor@,unknown,;t t
+s,@host_os@,linux-gnu,;t t
+s,@target@,x86_64-unknown-linux-gnu,;t t
+s,@target_cpu@,x86_64,;t t
+s,@target_vendor@,unknown,;t t
+s,@target_os@,linux-gnu,;t t
+s,@INSTALL_PROGRAM@,${INSTALL},;t t
+s,@INSTALL_SCRIPT@,${INSTALL},;t t
+s,@INSTALL_DATA@,${INSTALL} -m 644,;t t
+s,@CYGPATH_W@,echo,;t t
+s,@PACKAGE@,lmserver,;t t
+s,@VERSION@,1.0,;t t
+s,@ACLOCAL@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run aclocal-1.9,;t t
+s,@AUTOCONF@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run autoconf,;t t
+s,@AUTOMAKE@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run automake-1.9,;t t
+s,@AUTOHEADER@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run autoheader,;t t
+s,@MAKEINFO@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run makeinfo,;t t
+s,@install_sh@,/chomes/redpony/moses/moses-arabic/lmserver/install-sh,;t t
+s,@STRIP@,,;t t
+s,@ac_ct_STRIP@,,;t t
+s,@INSTALL_STRIP_PROGRAM@,${SHELL} $(install_sh) -c -s,;t t
+s,@mkdir_p@,mkdir -p --,;t t
+s,@AWK@,gawk,;t t
+s,@SET_MAKE@,,;t t
+s,@am__leading_dot@,.,;t t
+s,@AMTAR@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run tar,;t t
+s,@am__tar@,${AMTAR} chof - "$$tardir",;t t
+s,@am__untar@,${AMTAR} xf -,;t t
+s,@CC@,gcc,;t t
+s,@CFLAGS@,-g -O2,;t t
+s,@LDFLAGS@,-L/fs/clip-software/libevent-1.4.8-stable/lib  -L/fs/clip-software/srilm-1.5.6-PIC/lib/i686,;t t
+s,@CPPFLAGS@,-I/fs/clip-software/libevent-1.4.8-stable/include  -I/fs/clip-software/srilm-1.5.6-PIC/include,;t t
+s,@ac_ct_CC@,gcc,;t t
+s,@EXEEXT@,,;t t
+s,@OBJEXT@,o,;t t
+s,@DEPDIR@,.deps,;t t
+s,@am__include@,include,;t t
+s,@am__quote@,,;t t
+s,@AMDEP_TRUE@,,;t t
+s,@AMDEP_FALSE@,#,;t t
+s,@AMDEPBACKSLASH@,\,;t t
+s,@CCDEPMODE@,depmode=gcc3,;t t
+s,@am__fastdepCC_TRUE@,,;t t
+s,@am__fastdepCC_FALSE@,#,;t t
+s,@CXX@,g++,;t t
+s,@CXXFLAGS@,-g -O2,;t t
+s,@ac_ct_CXX@,g++,;t t
+s,@CXXDEPMODE@,depmode=gcc3,;t t
+s,@am__fastdepCXX_TRUE@,,;t t
+s,@am__fastdepCXX_FALSE@,#,;t t
+s,@CPP@,gcc -E,;t t
+s,@EGREP@,grep -E,;t t
+s,@SRI_LM_TRUE@,,;t t
+s,@SRI_LM_FALSE@,#,;t t
+s,@DAEMON_OBJ@,,;t t
+s,@LIBOBJS@,,;t t
+s,@LTLIBOBJS@,,;t t
+CEOF
+  # Split the substitutions into bite-sized pieces for seds with
+  # small command number limits, like on Digital OSF/1 and HP-UX.
+  ac_max_sed_lines=48
+  ac_sed_frag=1 # Number of current file.
+  ac_beg=1 # First line for current file.
+  ac_end=$ac_max_sed_lines # Line after last line for current file.
+  ac_more_lines=:
+  ac_sed_cmds=
+  while $ac_more_lines; do
+    if test $ac_beg -gt 1; then
+      sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+    else
+      sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+    fi
+    if test ! -s $tmp/subs.frag; then
+      ac_more_lines=false
+    else
+      # The purpose of the label and of the branching condition is to
+      # speed up the sed processing (if there are no `@' at all, there
+      # is no need to browse any of the substitutions).
+      # These are the two extra sed commands mentioned above.
+      (echo ':t
+  /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
+      if test -z "$ac_sed_cmds"; then
+	ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
+      else
+	ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
+      fi
+      ac_sed_frag=`expr $ac_sed_frag + 1`
+      ac_beg=$ac_end
+      ac_end=`expr $ac_end + $ac_max_sed_lines`
+    fi
+  done
+  if test -z "$ac_sed_cmds"; then
+    ac_sed_cmds=cat
+  fi
+fi # test -n "$CONFIG_FILES"
+for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
+  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+  case $ac_file in
+  - | *:- | *:-:* ) # input from stdin
+	cat >$tmp/stdin
+	ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+  *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+  * )   ac_file_in=$ac_file.in ;;
+  esac
+  # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
+  ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+  { if $as_mkdir_p; then
+    mkdir -p "$ac_dir"
+  else
+    as_dir="$ac_dir"
+    as_dirs=
+    while test ! -d "$as_dir"; do
+      as_dirs="$as_dir $as_dirs"
+      as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+    done
+    test ! -n "$as_dirs" || mkdir $as_dirs
+  fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+   { (exit 1); exit 1; }; }; }
+  ac_builddir=.
+if test "$ac_dir" != .; then
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A "../" for each directory in $ac_dir_suffix.
+  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+  ac_dir_suffix= ac_top_builddir=
+fi
+case $srcdir in
+  .)  # No --srcdir option.  We are building in place.
+    ac_srcdir=.
+    if test -z "$ac_top_builddir"; then
+       ac_top_srcdir=.
+    else
+       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+    fi ;;
+  [\\/]* | ?:[\\/]* )  # Absolute path.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir ;;
+  *) # Relative path.
+    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+  case "$ac_dir" in
+  .) ac_abs_builddir=`pwd`;;
+  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+  *) ac_abs_builddir=`pwd`/"$ac_dir";;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+  case ${ac_top_builddir}. in
+  .) ac_abs_top_builddir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+  case $ac_srcdir in
+  .) ac_abs_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+  case $ac_top_srcdir in
+  .) ac_abs_top_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+  esac;;
+esac
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_builddir$INSTALL ;;
+  esac
+  if test x"$ac_file" != x-; then
+    { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+    rm -f "$ac_file"
+  fi
+  # Let's still pretend it is `configure' which instantiates (i.e., don't
+  # use $as_me), people would be surprised to read:
+  #    /* config.h.  Generated by config.status.  */
+  if test x"$ac_file" = x-; then
+    configure_input=
+  else
+    configure_input="$ac_file.  "
+  fi
+  configure_input=$configure_input"Generated from `echo $ac_file_in |
+				     sed 's,.*/,,'` by configure."
+  # First look for the input files in the build tree, otherwise in the
+  # src tree.
+  ac_file_inputs=`IFS=:
+    for f in $ac_file_in; do
+      case $f in
+      -) echo $tmp/stdin ;;
+      [\\/$]*)
+	 # Absolute (can't be DOS-style, as IFS=:)
+	 test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+   { (exit 1); exit 1; }; }
+	 echo "$f";;
+      *) # Relative
+	 if test -f "$f"; then
+	   # Build tree
+	   echo "$f"
+	 elif test -f "$srcdir/$f"; then
+	   # Source tree
+	   echo "$srcdir/$f"
+	 else
+	   # /dev/null tree
+	   { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+   { (exit 1); exit 1; }; }
+	 fi;;
+      esac
+    done` || { (exit 1); exit 1; }
+  sed "/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/;
+s/:*\${srcdir}:*/:/;
+s/:*@srcdir@:*/:/;
+s/^\([^=]*=[	 ]*\):*/\1/;
+s/:*$//;
+s/^[^=]*=[	 ]*$//;
+}
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s,@configure_input@,$configure_input,;t t
+s,@srcdir@,$ac_srcdir,;t t
+s,@abs_srcdir@,$ac_abs_srcdir,;t t
+s,@top_srcdir@,$ac_top_srcdir,;t t
+s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
+s,@builddir@,$ac_builddir,;t t
+s,@abs_builddir@,$ac_abs_builddir,;t t
+s,@top_builddir@,$ac_top_builddir,;t t
+s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
+s,@INSTALL@,$ac_INSTALL,;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
+  rm -f $tmp/stdin
+  if test x"$ac_file" != x-; then
+    mv $tmp/out $ac_file
+  else
+    cat $tmp/out
+    rm -f $tmp/out
+  fi
+done
+#
+# CONFIG_HEADER section.
+#
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s,^\([	 ]*\)#\([	 ]*define[	 ][	 ]*\)'
+ac_dB='[	 ].*$,\1#\2'
+ac_dC=' '
+ac_dD=',;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s,^\([	 ]*\)#\([	 ]*\)undef\([	 ][	 ]*\)'
+ac_uB='$,\1#\2define\3'
+ac_uC=' '
+ac_uD=',;t'
+for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
+  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+  case $ac_file in
+  - | *:- | *:-:* ) # input from stdin
+	cat >$tmp/stdin
+	ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+  *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+	ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+  * )   ac_file_in=$ac_file.in ;;
+  esac
+  test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+  # First look for the input files in the build tree, otherwise in the
+  # src tree.
+  ac_file_inputs=`IFS=:
+    for f in $ac_file_in; do
+      case $f in
+      -) echo $tmp/stdin ;;
+      [\\/$]*)
+	 # Absolute (can't be DOS-style, as IFS=:)
+	 test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+   { (exit 1); exit 1; }; }
+	 # Do quote $f, to prevent DOS paths from being IFS'd.
+	 echo "$f";;
+      *) # Relative
+	 if test -f "$f"; then
+	   # Build tree
+	   echo "$f"
+	 elif test -f "$srcdir/$f"; then
+	   # Source tree
+	   echo "$srcdir/$f"
+	 else
+	   # /dev/null tree
+	   { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+   { (exit 1); exit 1; }; }
+	 fi;;
+      esac
+    done` || { (exit 1); exit 1; }
+  # Remove the trailing spaces.
+  sed 's/[	 ]*$//' $ac_file_inputs >$tmp/in
+  # Handle all the #define templates only if necessary.
+  if grep "^[	 ]*#[	 ]*define" $tmp/in >/dev/null; then
+  # If there are no defines, we may have an empty if/fi
+  :
+  cat >$tmp/defines.sed <<CEOF
+/^[	 ]*#[	 ]*define/!b
+t clr
+: clr
+${ac_dA}PACKAGE_NAME${ac_dB}PACKAGE_NAME${ac_dC}"lmserver"${ac_dD}
+${ac_dA}PACKAGE_TARNAME${ac_dB}PACKAGE_TARNAME${ac_dC}"lmserver"${ac_dD}
+${ac_dA}PACKAGE_VERSION${ac_dB}PACKAGE_VERSION${ac_dC}"1.0"${ac_dD}
+${ac_dA}PACKAGE_STRING${ac_dB}PACKAGE_STRING${ac_dC}"lmserver 1.0"${ac_dD}
+${ac_dA}PACKAGE_BUGREPORT${ac_dB}PACKAGE_BUGREPORT${ac_dC}""${ac_dD}
+${ac_dA}PACKAGE${ac_dB}PACKAGE${ac_dC}"lmserver"${ac_dD}
+${ac_dA}VERSION${ac_dB}VERSION${ac_dC}"1.0"${ac_dD}
+${ac_dA}STDC_HEADERS${ac_dB}STDC_HEADERS${ac_dC}1${ac_dD}
+${ac_dA}HAVE_SYS_TYPES_H${ac_dB}HAVE_SYS_TYPES_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_SYS_STAT_H${ac_dB}HAVE_SYS_STAT_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_STDLIB_H${ac_dB}HAVE_STDLIB_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_STRING_H${ac_dB}HAVE_STRING_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_MEMORY_H${ac_dB}HAVE_MEMORY_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_STRINGS_H${ac_dB}HAVE_STRINGS_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_INTTYPES_H${ac_dB}HAVE_INTTYPES_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_STDINT_H${ac_dB}HAVE_STDINT_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_UNISTD_H${ac_dB}HAVE_UNISTD_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_SRILM${ac_dB}HAVE_SRILM${ac_dC}${ac_dD}
+${ac_dA}HAVE_DAEMON${ac_dB}HAVE_DAEMON${ac_dC}${ac_dD}
+${ac_dA}HAVE__BOOL${ac_dB}HAVE__BOOL${ac_dC}1${ac_dD}
+${ac_dA}HAVE_STDBOOL_H${ac_dB}HAVE_STDBOOL_H${ac_dC}1${ac_dD}
+${ac_dA}HAVE_MALLOC_H${ac_dB}HAVE_MALLOC_H${ac_dC}${ac_dD}
+${ac_dA}HAVE_STRUCT_MALLINFO${ac_dB}HAVE_STRUCT_MALLINFO${ac_dC}${ac_dD}
+${ac_dA}ENDIAN_LITTLE${ac_dB}ENDIAN_LITTLE${ac_dC}1${ac_dD}
+${ac_dA}HAVE_MLOCKALL${ac_dB}HAVE_MLOCKALL${ac_dC}1${ac_dD}
+CEOF
+  sed -f $tmp/defines.sed $tmp/in >$tmp/out
+  rm -f $tmp/in
+  mv $tmp/out $tmp/in
+  fi # grep
+  # Handle all the #undef templates
+  cat >$tmp/undefs.sed <<CEOF
+/^[	 ]*#[	 ]*undef/!b
+t clr
+: clr
+${ac_uA}PACKAGE_NAME${ac_uB}PACKAGE_NAME${ac_uC}"lmserver"${ac_uD}
+${ac_uA}PACKAGE_TARNAME${ac_uB}PACKAGE_TARNAME${ac_uC}"lmserver"${ac_uD}
+${ac_uA}PACKAGE_VERSION${ac_uB}PACKAGE_VERSION${ac_uC}"1.0"${ac_uD}
+${ac_uA}PACKAGE_STRING${ac_uB}PACKAGE_STRING${ac_uC}"lmserver 1.0"${ac_uD}
+${ac_uA}PACKAGE_BUGREPORT${ac_uB}PACKAGE_BUGREPORT${ac_uC}""${ac_uD}
+${ac_uA}PACKAGE${ac_uB}PACKAGE${ac_uC}"lmserver"${ac_uD}
+${ac_uA}VERSION${ac_uB}VERSION${ac_uC}"1.0"${ac_uD}
+${ac_uA}STDC_HEADERS${ac_uB}STDC_HEADERS${ac_uC}1${ac_uD}
+${ac_uA}HAVE_SYS_TYPES_H${ac_uB}HAVE_SYS_TYPES_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_SYS_STAT_H${ac_uB}HAVE_SYS_STAT_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_STDLIB_H${ac_uB}HAVE_STDLIB_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_STRING_H${ac_uB}HAVE_STRING_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_MEMORY_H${ac_uB}HAVE_MEMORY_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_STRINGS_H${ac_uB}HAVE_STRINGS_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_INTTYPES_H${ac_uB}HAVE_INTTYPES_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_STDINT_H${ac_uB}HAVE_STDINT_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_UNISTD_H${ac_uB}HAVE_UNISTD_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_SRILM${ac_uB}HAVE_SRILM${ac_uC}${ac_uD}
+${ac_uA}HAVE_DAEMON${ac_uB}HAVE_DAEMON${ac_uC}${ac_uD}
+${ac_uA}HAVE__BOOL${ac_uB}HAVE__BOOL${ac_uC}1${ac_uD}
+${ac_uA}HAVE_STDBOOL_H${ac_uB}HAVE_STDBOOL_H${ac_uC}1${ac_uD}
+${ac_uA}HAVE_MALLOC_H${ac_uB}HAVE_MALLOC_H${ac_uC}${ac_uD}
+${ac_uA}HAVE_STRUCT_MALLINFO${ac_uB}HAVE_STRUCT_MALLINFO${ac_uC}${ac_uD}
+${ac_uA}ENDIAN_LITTLE${ac_uB}ENDIAN_LITTLE${ac_uC}1${ac_uD}
+${ac_uA}HAVE_MLOCKALL${ac_uB}HAVE_MLOCKALL${ac_uC}1${ac_uD}
+s,^[	 ]*#[	 ]*undef[	 ][	 ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
+CEOF
+  sed -f $tmp/undefs.sed $tmp/in >$tmp/out
+  rm -f $tmp/in
+  mv $tmp/out $tmp/in
+  # Let's still pretend it is `configure' which instantiates (i.e., don't
+  # use $as_me), people would be surprised to read:
+  #    /* config.h.  Generated by config.status.  */
+  if test x"$ac_file" = x-; then
+    echo "/* Generated by configure.  */" >$tmp/config.h
+  else
+    echo "/* $ac_file.  Generated by configure.  */" >$tmp/config.h
+  fi
+  cat $tmp/in >>$tmp/config.h
+  rm -f $tmp/in
+  if test x"$ac_file" != x-; then
+    if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
+      { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+      { if $as_mkdir_p; then
+    mkdir -p "$ac_dir"
+  else
+    as_dir="$ac_dir"
+    as_dirs=
+    while test ! -d "$as_dir"; do
+      as_dirs="$as_dir $as_dirs"
+      as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+    done
+    test ! -n "$as_dirs" || mkdir $as_dirs
+  fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+   { (exit 1); exit 1; }; }; }
+      rm -f $ac_file
+      mv $tmp/config.h $ac_file
+    fi
+  else
+    cat $tmp/config.h
+    rm -f $tmp/config.h
+  fi
+# Compute $ac_file's index in $config_headers.
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $ac_file | $ac_file:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $ac_file" >`(dirname $ac_file) 2>/dev/null ||
+$as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X$ac_file : 'X\(//\)[^/]' \| \
+	 X$ac_file : 'X\(//\)$' \| \
+	 X$ac_file : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X$ac_file |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`/stamp-h$_am_stamp_count
+done
+#
+# CONFIG_COMMANDS section.
+#
+for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue
+  ac_dest=`echo "$ac_file" | sed 's,:.*,,'`
+  ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'`
+  ac_dir=`(dirname "$ac_dest") 2>/dev/null ||
+$as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_dest" : 'X\(//\)[^/]' \| \
+	 X"$ac_dest" : 'X\(//\)$' \| \
+	 X"$ac_dest" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$ac_dest" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+  { if $as_mkdir_p; then
+    mkdir -p "$ac_dir"
+  else
+    as_dir="$ac_dir"
+    as_dirs=
+    while test ! -d "$as_dir"; do
+      as_dirs="$as_dir $as_dirs"
+      as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+    done
+    test ! -n "$as_dirs" || mkdir $as_dirs
+  fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+   { (exit 1); exit 1; }; }; }
+  ac_builddir=.
+if test "$ac_dir" != .; then
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A "../" for each directory in $ac_dir_suffix.
+  ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+  ac_dir_suffix= ac_top_builddir=
+fi
+case $srcdir in
+  .)  # No --srcdir option.  We are building in place.
+    ac_srcdir=.
+    if test -z "$ac_top_builddir"; then
+       ac_top_srcdir=.
+    else
+       ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+    fi ;;
+  [\\/]* | ?:[\\/]* )  # Absolute path.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir ;;
+  *) # Relative path.
+    ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Do not use `cd foo && pwd` to compute absolute paths, because
+# the directories may not exist.
+case `pwd` in
+.) ac_abs_builddir="$ac_dir";;
+*)
+  case "$ac_dir" in
+  .) ac_abs_builddir=`pwd`;;
+  [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
+  *) ac_abs_builddir=`pwd`/"$ac_dir";;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_builddir=${ac_top_builddir}.;;
+*)
+  case ${ac_top_builddir}. in
+  .) ac_abs_top_builddir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
+  *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_srcdir=$ac_srcdir;;
+*)
+  case $ac_srcdir in
+  .) ac_abs_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
+  *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
+  esac;;
+esac
+case $ac_abs_builddir in
+.) ac_abs_top_srcdir=$ac_top_srcdir;;
+*)
+  case $ac_top_srcdir in
+  .) ac_abs_top_srcdir=$ac_abs_builddir;;
+  [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
+  *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
+  esac;;
+esac
+  { echo "$as_me:$LINENO: executing $ac_dest commands" >&5
+echo "$as_me: executing $ac_dest commands" >&6;}
+  case $ac_dest in
+    depfiles ) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do
+  # Strip MF so we end up with the name of the file.
+  mf=`echo "$mf" | sed -e 's/:.*$//'`
+  # Check whether this is an Automake generated Makefile or not.
+  # We used to match only the files named `Makefile.in', but
+  # some people rename them; so instead we look at the file content.
+  # Grep'ing the first line is not enough: some people post-process
+  # each Makefile.in and add a new line on top of each file to say so.
+  # So let's grep whole file.
+  if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then
+    dirpart=`(dirname "$mf") 2>/dev/null ||
+$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$mf" : 'X\(//\)[^/]' \| \
+	 X"$mf" : 'X\(//\)$' \| \
+	 X"$mf" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$mf" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+  else
+    continue
+  fi
+  # Extract the definition of DEPDIR, am__include, and am__quote
+  # from the Makefile without running `make'.
+  DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
+  test -z "$DEPDIR" && continue
+  am__include=`sed -n 's/^am__include = //p' < "$mf"`
+  test -z "am__include" && continue
+  am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
+  # When using ansi2knr, U may be empty or an underscore; expand it
+  U=`sed -n 's/^U = //p' < "$mf"`
+  # Find all dependency output files, they are included files with
+  # $(DEPDIR) in their names.  We invoke sed twice because it is the
+  # simplest approach to changing $(DEPDIR) to its actual value in the
+  # expansion.
+  for file in `sed -n "
+    s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
+       sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
+    # Make sure the directory exists.
+    test -f "$dirpart/$file" && continue
+    fdir=`(dirname "$file") 2>/dev/null ||
+$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$file" : 'X\(//\)[^/]' \| \
+	 X"$file" : 'X\(//\)$' \| \
+	 X"$file" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+    { if $as_mkdir_p; then
+    mkdir -p $dirpart/$fdir
+  else
+    as_dir=$dirpart/$fdir
+    as_dirs=
+    while test ! -d "$as_dir"; do
+      as_dirs="$as_dir $as_dirs"
+      as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| \
+	 .     : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+  	  /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+  	  /^X\(\/\/\)$/{ s//\1/; q; }
+  	  /^X\(\/\).*/{ s//\1/; q; }
+  	  s/.*/./; q'`
+    done
+    test ! -n "$as_dirs" || mkdir $as_dirs
+  fi || { { echo "$as_me:$LINENO: error: cannot create directory $dirpart/$fdir" >&5
+echo "$as_me: error: cannot create directory $dirpart/$fdir" >&2;}
+   { (exit 1); exit 1; }; }; }
+    # echo "creating $dirpart/$file"
+    echo '# dummy' > "$dirpart/$file"
+  done
+done
+ ;;
+  esac
+done
+{ (exit 0); exit 0; }

mosesdecoder/contrib/lmserver/config.sub ADDED Viewed

	@@ -0,0 +1,1676 @@

+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+#   Free Software Foundation, Inc.
+timestamp='2008-01-16'
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+me=`echo "$0" | sed -e 's,.*/,,'`
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+Canonicalize a configuration name.
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+Report bugs and patches to <config-patches@gnu.org>."
+version="\
+GNU config.sub ($timestamp)
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+help="
+Try \`$me --help' for more information."
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit ;;
+    * )
+       break ;;
+  esac
+done
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
+  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis | -knuth | -cray)
+		os=
+		basic_machine=$1
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+ 	-chorusrdb)
+ 		os=-chorusrdb
+		basic_machine=$1
+ 		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+	| bfin \
+	| c4x | clipper \
+	| d10v | d30v | dlx | dsp16xx | dvp \
+	| fido | fr30 | frv \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| i370 | i860 | i960 | ia64 \
+	| ip2k | iq2000 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | mcore | mep \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64vr | mips64vrel \
+	| mips64orion | mips64orionel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| mt \
+	| msp430 \
+	| nios | nios2 \
+	| ns16k | ns32k \
+	| or32 \
+	| pdp10 | pdp11 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| pyramid \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]a*eb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu | strongarm \
+	| tahoe | thumb | tic4x | tic80 | tron \
+	| v850 | v850e \
+	| we32k \
+	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
+	| z8k)
+		basic_machine=$basic_machine-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12)
+		# Motorola 68HC11/12.
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* | avr32-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| clipper-* | craynv-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| elxsi-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| m32c-* | m32r-* | m32rle-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
+	| msp430-* \
+	| nios-* | nios2-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| pyramid-* \
+	| romp-* | rs6000-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]a*eb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
+	| tahoe-* | thumb-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tron-* \
+	| v850-* | v850e-* | vax-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
+	| xstormy16-* | xtensa*-* \
+	| ymp-* \
+	| z8k-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-unknown
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+    	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16)
+		basic_machine=cr16-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2* | dpx2*-bull)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppa-next)
+		os=-nextstep3
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
+	i*86v32)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	i386-vsta | vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	m88k-omron*)
+		basic_machine=m88k-omron
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	mingw32)
+		basic_machine=i386-pc
+		os=-mingw32
+		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mipsEE* | ee | ps2)
+		basic_machine=mips64r5900el-scei
+		case $os in
+		    -linux*)
+			;;
+		    *)
+			os=-elf
+			;;
+		esac
+		;;
+	iop)
+		basic_machine=mipsel-scei
+		os=-irx
+		;;
+	dvp)
+		basic_machine=dvp-scei
+		os=-elf
+		;;
+	mips3*-*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	ms1-*)
+		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next )
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	openrisc | openrisc-*)
+		basic_machine=or32-unknown
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc)	basic_machine=powerpc-unknown
+		;;
+	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle | ppc-le | powerpc-little)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rdos)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh)
+		basic_machine=sh-hitachi
+		os=-hms
+		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparclite-wrs | simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tic54x | c54x*)
+		basic_machine=tic54x-unknown
+		os=-coff
+		;;
+	tic55x | c55x*)
+		basic_machine=tic55x-unknown
+		os=-coff
+		;;
+	tic6x | c6x*)
+		basic_machine=tic6x-unknown
+		os=-coff
+		;;
+	tile*)
+		basic_machine=tile-unknown
+		os=-linux-gnu
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	z8k-*-coff)
+		basic_machine=z8k-unknown
+		os=-sim
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp10)
+		# there are many clones, so DEC is not a safe bet
+		basic_machine=pdp10-unknown
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+		basic_machine=sparc-sun
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+esac
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+# Decode manufacturer-specific aliases for certain operating systems.
+if [ x"$os" != x"" ]
+then
+case $os in
+        # First match some system type aliases
+        # that might get confused with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# First accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST END IN A *, to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -openbsd* | -solidbsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+	      | -chorusos* | -chorusrdb* \
+	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -irx*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo $os | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+        -os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-osfrose*)
+		os=-osfrose
+		;;
+	-osf*)
+		os=-osf
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2 )
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+        -tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-es1800*)
+		os=-ose
+		;;
+	-xenix)
+		os=-xenix
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-aros*)
+		os=-aros
+		;;
+	-kaos*)
+		os=-kaos
+		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+case $basic_machine in
+        score-*)
+		os=-elf
+		;;
+        spu-*)
+		os=-elf
+		;;
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+        c4x-* | tic4x-*)
+        	os=-coff
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		# This also exists in the configure program, but was not the
+		# default.
+		# os=-sunos4
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+        mep-*)
+		os=-elf
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-haiku)
+		os=-haiku
+		;;
+	*-ibm)
+		os=-aix
+		;;
+    	*-knuth)
+		os=-mmixware
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next )
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-next)
+		os=-nextstep3
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+		;;
+esac
+echo $basic_machine$os
+exit
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:

mosesdecoder/contrib/lmserver/configure.ac ADDED Viewed

	@@ -0,0 +1,235 @@

+AC_PREREQ(2.52)
+AC_INIT(lmserver, 1.0)
+AC_CANONICAL_SYSTEM
+AC_CONFIG_SRCDIR(lmserver.c)
+AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
+AM_CONFIG_HEADER(config.h)
+AC_PROG_CC
+AC_PROG_CXX
+AM_PROG_CC_C_O
+AC_PROG_INSTALL
+AC_ARG_WITH(srilm,
+        [AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
+	[with_srilm=$withval],
+	[with_srilm=no]
+)
+AC_ARG_ENABLE(64bit,
+  [AS_HELP_STRING([--enable-64bit],[build 64bit verison])])
+if test "x$enable_64bit" == "xyes"
+then
+    org_cflags=$CFLAGS
+    CFLAGS=-m64
+    AC_RUN_IFELSE(
+      [AC_LANG_PROGRAM([], [dnl
+return sizeof(void*) == 8 ? 0 : 1;
+      ])
+    ],[
+      CFLAGS="-m64 $org_cflags"
+    ],[
+    AC_MSG_ERROR([Don't know how to build a 64-bit object.])
+    ])
+fi
+trylibeventdir=""
+AC_ARG_WITH(libevent,
+       [  --with-libevent=PATH     Specify path to libevent installation ],
+       [
+                if test "x$withval" != "xno" ; then
+                        trylibeventdir=$withval
+                fi
+       ]
+)
+if test "x$with_srilm" != 'xno'
+then
+  SAVE_CPPFLAGS="$CPPFLAGS"
+  CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
+  AC_CHECK_HEADER(Prob.h,
+                 [AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
+                 [AC_MSG_ERROR([Cannot find SRILM!])])
+  LIB_SRILM="-loolm -ldstruct -lmisc"
+  # ROOT/lib/i686-m64/liboolm.a
+  # ROOT/lib/i686-m64/libdstruct.a
+  # ROOT/lib/i686-m64/libmisc.a
+  MY_ARCH=`${with_srilm}/sbin/machine-type`
+  LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH}"
+  LIBS="$LIBS $LIB_SRILM"
+  FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
+  AM_CONDITIONAL([SRI_LM], true)
+fi
+dnl ------------------------------------------------------
+dnl libevent detection.  swiped from Tor.  modified a bit.
+LIBEVENT_URL=http://www.monkey.org/~provos/libevent/
+AC_CACHE_CHECK([for libevent directory], ac_cv_libevent_dir, [
+  saved_LIBS="$LIBS"
+  saved_LDFLAGS="$LDFLAGS"
+  saved_CPPFLAGS="$CPPFLAGS"
+  le_found=no
+  for ledir in $trylibeventdir "" $prefix /usr/local ; do
+    LDFLAGS="$saved_LDFLAGS"
+    LIBS="$saved_LIBS -levent"
+    # Skip the directory if it isn't there.
+    if test ! -z "$ledir" -a ! -d "$ledir" ; then
+       continue;
+    fi
+    if test ! -z "$ledir" ; then
+      if test -d "$ledir/lib" ; then
+        LDFLAGS="-L$ledir/lib $LDFLAGS"
+      else
+        LDFLAGS="-L$ledir $LDFLAGS"
+      fi
+      if test -d "$ledir/include" ; then
+        CPPFLAGS="-I$ledir/include $CPPFLAGS"
+      else
+        CPPFLAGS="-I$ledir $CPPFLAGS"
+      fi
+    fi
+    # Can I compile and link it?
+    AC_TRY_LINK([#include <sys/time.h>
+#include <sys/types.h>
+#include <event.h>], [ event_init(); ],
+       [ libevent_linked=yes ], [ libevent_linked=no ])
+    if test $libevent_linked = yes; then
+       if test ! -z "$ledir" ; then
+         ac_cv_libevent_dir=$ledir
+       else
+         ac_cv_libevent_dir="(system)"
+       fi
+       le_found=yes
+       break
+    fi
+  done
+  LIBS="$saved_LIBS"
+  LDFLAGS="$saved_LDFLAGS"
+  CPPFLAGS="$saved_CPPFLAGS"
+  if test $le_found = no ; then
+    AC_MSG_ERROR([libevent is required.  You can get it from $LIBEVENT_URL
+      If it's already installed, specify its path using --with-libevent=/dir/
+])
+  fi
+])
+LIBS="$LIBS -levent"
+if test $ac_cv_libevent_dir != "(system)"; then
+  if test -d "$ac_cv_libevent_dir/lib" ; then
+    LDFLAGS="-L$ac_cv_libevent_dir/lib $LDFLAGS"
+    le_libdir="$ac_cv_libevent_dir/lib"
+  else
+    LDFLAGS="-L$ac_cv_libevent_dir $LDFLAGS"
+    le_libdir="$ac_cv_libevent_dir"
+  fi
+  if test -d "$ac_cv_libevent_dir/include" ; then
+    CPPFLAGS="-I$ac_cv_libevent_dir/include $CPPFLAGS"
+  else
+    CPPFLAGS="-I$ac_cv_libevent_dir $CPPFLAGS"
+  fi
+fi
+dnl ----------------------------------------------------------------------------
+AC_SEARCH_LIBS(socket, socket)
+AC_SEARCH_LIBS(gethostbyname, nsl)
+AC_SEARCH_LIBS(mallinfo, malloc)
+AC_CHECK_FUNC(daemon,AC_DEFINE([HAVE_DAEMON],,[Define this if you have daemon()]),[DAEMON_OBJ=daemon.o])
+AC_SUBST(DAEMON_OBJ)
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_CHECK_HEADER(malloc.h, AC_DEFINE(HAVE_MALLOC_H,,[do we have malloc.h?]))
+AC_CHECK_MEMBER([struct mallinfo.arena], [
+        AC_DEFINE(HAVE_STRUCT_MALLINFO,,[do we have stuct mallinfo?])
+    ], ,[
+#    include <malloc.h>
+    ]
+)
+dnl From licq: Copyright (c) 2000 Dirk Mueller
+dnl Check if the type socklen_t is defined anywhere
+AC_DEFUN([AC_C_SOCKLEN_T],
+[AC_CACHE_CHECK(for socklen_t, ac_cv_c_socklen_t,
+[
+  AC_TRY_COMPILE([
+    #include <sys/types.h>
+    #include <sys/socket.h>
+  ],[
+    socklen_t foo;
+  ],[
+    ac_cv_c_socklen_t=yes
+  ],[
+    ac_cv_c_socklen_t=no
+  ])
+])
+if test $ac_cv_c_socklen_t = no; then
+  AC_DEFINE(socklen_t, int, [define to int if socklen_t not available])
+fi
+])
+AC_C_SOCKLEN_T
+dnl Check if we're a little-endian or a big-endian system, needed by hash code
+AC_DEFUN([AC_C_ENDIAN],
+[AC_CACHE_CHECK(for endianness, ac_cv_c_endian,
+[
+  AC_RUN_IFELSE(
+    [AC_LANG_PROGRAM([], [dnl
+        long val = 1;
+        char *c = (char *) &val;
+        exit(*c == 1);
+    ])
+  ],[
+    ac_cv_c_endian=big
+  ],[
+    ac_cv_c_endian=little
+  ])
+])
+if test $ac_cv_c_endian = big; then
+  AC_DEFINE(ENDIAN_BIG, 1, [machine is bigendian])
+fi
+if test $ac_cv_c_endian = little; then
+  AC_DEFINE(ENDIAN_LITTLE, 1, [machine is littleendian])
+fi
+])
+AC_C_ENDIAN
+dnl Check whether the user wants threads or not
+AC_ARG_ENABLE(threads,
+  [AS_HELP_STRING([--enable-threads],[support multithreaded execution])])
+if test "x$enable_threads" == "xyes"; then
+  AC_SEARCH_LIBS(pthread_create, pthread)
+  if test "x$ac_cv_search_pthread_create" != "xno"; then
+    AC_DEFINE([USE_THREADS],,[Define this if you want to use pthreads])
+    dnl Sun compilers need the -mt flag!
+    AC_RUN_IFELSE(
+      [AC_LANG_PROGRAM([], [dnl
+#ifdef __SUNPRO_C
+   return 0;
+#else
+   return 1;
+#endif
+      ])
+    ],[
+      CFLAGS="-mt $CFLAGS"
+    ])
+  else
+    AC_MSG_ERROR([Can't enable threads without the POSIX thread library.])
+  fi
+fi
+AC_CHECK_FUNCS(mlockall)
+AC_CHECK_FUNCS(getpagesizes)
+AC_CHECK_FUNCS(memcntl)
+AC_CONFIG_FILES(Makefile)
+AC_OUTPUT

mosesdecoder/contrib/lmserver/depcomp ADDED Viewed

	@@ -0,0 +1,589 @@

+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+scriptversion=2007-03-29.01
+# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007 Free Software
+# Foundation, Inc.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+case $1 in
+  '')
+     echo "$0: No command.  Try \`$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+Environment variables:
+  depmode     Dependency tracking mode.
+  source      Source file read by `PROGRAMS ARGS'.
+  object      Object file output by `PROGRAMS ARGS'.
+  DEPDIR      directory where to store dependencies.
+  depfile     Dependency file to output.
+  tmpdepfile  Temporary file to use when outputing dependencies.
+  libtool     Whether libtool is used (yes/no).
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "depcomp $scriptversion"
+    exit $?
+    ;;
+esac
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+rm -f "$tmpdepfile"
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+if test "$depmode" = dashXmstdout; then
+   # This is just like dashmstdout with a different argument.
+   dashmflag=-xM
+   depmode=dashmstdout
+fi
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+## Unfortunately, FreeBSD c89 acceptance of flags depends upon
+## the command line argument order; so add the flags where they
+## appear in depend2.am.  Note that the slowdown incurred here
+## affects only configure: in makefiles, %FASTDEP% shortcuts this.
+  for arg
+  do
+    case $arg in
+    -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
+    *)  set fnord "$@" "$arg" ;;
+    esac
+    shift # fnord
+    shift # $arg
+  done
+  "$@"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+gcc)
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+## The second -e expression handles DOS-style file names with drive letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the `deleted header file' problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+  tr ' ' '
+' < "$tmpdepfile" |
+## Some versions of gcc put a space before the `:'.  On the theory
+## that the space means something, we add a space to the output as
+## well.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like `#:fec' to the end of the
+    # dependency line.
+    tr ' ' '
+' < "$tmpdepfile" \
+    | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
+    tr '
+' ' ' >> $depfile
+    echo >> $depfile
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' '
+' < "$tmpdepfile" \
+   | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+   >> $depfile
+  else
+    # The sourcefile does not contain any dependencies, so just
+    # store a dummy comment line, to avoid errors with the Makefile
+    # "include basename.Plo" scheme.
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  In older versions, this file always lives in the
+  # current directory.  Also, the AIX compiler puts `$object:' at the
+  # start of each line; $object doesn't have directory information.
+  # Version 6 uses the directory in both cases.
+  dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
+  test "x$dir" = "x$object" && dir=
+  base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$base.u
+    tmpdepfile3=$dir.libs/$base.u
+    "$@" -Wc,-M
+  else
+    tmpdepfile1=$dir$base.u
+    tmpdepfile2=$dir$base.u
+    tmpdepfile3=$dir$base.u
+    "$@" -M
+  fi
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+    exit $stat
+  fi
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  if test -f "$tmpdepfile"; then
+    # Each line is of the form `foo.o: dependent.h'.
+    # Do two passes, one to just change these to
+    # `$object: dependent.h' and one to simply `dependent.h:'.
+    sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
+    # That's a tab and a space in the [].
+    sed -e 's,^.*\.[a-z]*:[	 ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+  else
+    # The sourcefile does not contain any dependencies, so just
+    # store a dummy comment line, to avoid errors with the Makefile
+    # "include basename.Plo" scheme.
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+icc)
+  # Intel's C compiler understands `-MD -MF file'.  However on
+  #    icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
+  # ICC 7.0 will fill foo.d with something like
+  #    foo.o: sub/foo.c
+  #    foo.o: sub/foo.h
+  # which is wrong.  We want:
+  #    sub/foo.o: sub/foo.c
+  #    sub/foo.o: sub/foo.h
+  #    sub/foo.c:
+  #    sub/foo.h:
+  # ICC 7.1 will output
+  #    foo.o: sub/foo.c sub/foo.h
+  # and will wrap long lines using \ :
+  #    foo.o: sub/foo.c ... \
+  #     sub/foo.h ... \
+  #     ...
+  "$@" -MD -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each line is of the form `foo.o: dependent.h',
+  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Do two passes, one to just change these to
+  # `$object: dependent.h' and one to simply `dependent.h:'.
+  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
+    sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+hp2)
+  # The "hp" stanza above does not work with aCC (C++) and HP's ia64
+  # compilers, which have integrated preprocessors.  The correct option
+  # to use with these is +Maked; it writes dependencies to a file named
+  # 'foo.d', which lands next to the object file, wherever that
+  # happens to be.
+  # Much of this is similar to the tru64 case; see comments there.
+  dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
+  test "x$dir" = "x$object" && dir=
+  base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
+  if test "$libtool" = yes; then
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir.libs/$base.d
+    "$@" -Wc,+Maked
+  else
+    tmpdepfile1=$dir$base.d
+    tmpdepfile2=$dir$base.d
+    "$@" +Maked
+  fi
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+     rm -f "$tmpdepfile1" "$tmpdepfile2"
+     exit $stat
+  fi
+  for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
+  do
+    test -f "$tmpdepfile" && break
+  done
+  if test -f "$tmpdepfile"; then
+    sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
+    # Add `dependent.h:' lines.
+    sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile"
+  else
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile" "$tmpdepfile2"
+  ;;
+tru64)
+   # The Tru64 compiler uses -MD to generate dependencies as a side
+   # effect.  `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
+   # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+   # dependencies in `foo.d' instead, so we check for that too.
+   # Subdirectories are respected.
+   dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
+   test "x$dir" = "x$object" && dir=
+   base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
+   if test "$libtool" = yes; then
+      # With Tru64 cc, shared objects can also be used to make a
+      # static library.  This mechanism is used in libtool 1.4 series to
+      # handle both shared and static libraries in a single compilation.
+      # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
+      #
+      # With libtool 1.5 this exception was removed, and libtool now
+      # generates 2 separate objects for the 2 libraries.  These two
+      # compilations output dependencies in $dir.libs/$base.o.d and
+      # in $dir$base.o.d.  We have to check for both files, because
+      # one of the two compilations can be disabled.  We should prefer
+      # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
+      # automatically cleaned when .libs/ is deleted, while ignoring
+      # the former would cause a distcleancheck panic.
+      tmpdepfile1=$dir.libs/$base.lo.d   # libtool 1.4
+      tmpdepfile2=$dir$base.o.d          # libtool 1.5
+      tmpdepfile3=$dir.libs/$base.o.d    # libtool 1.5
+      tmpdepfile4=$dir.libs/$base.d      # Compaq CCC V6.2-504
+      "$@" -Wc,-MD
+   else
+      tmpdepfile1=$dir$base.o.d
+      tmpdepfile2=$dir$base.d
+      tmpdepfile3=$dir$base.d
+      tmpdepfile4=$dir$base.d
+      "$@" -MD
+   fi
+   stat=$?
+   if test $stat -eq 0; then :
+   else
+      rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
+      exit $stat
+   fi
+   for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
+   do
+     test -f "$tmpdepfile" && break
+   done
+   if test -f "$tmpdepfile"; then
+      sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
+      # That's a tab and a space in the [].
+      sed -e 's,^.*\.[a-z]*:[	 ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+   else
+      echo "#dummy" > "$depfile"
+   fi
+   rm -f "$tmpdepfile"
+   ;;
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o.
+  "$@" || exit $?
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test $1 != '--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # Remove `-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+  test -z "$dashmflag" && dashmflag=-M
+  # Require at least two characters before searching for `:'
+  # in the target name.  This is to cope with DOS-style filenames:
+  # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
+  "$@" $dashmflag |
+    sed 's:^[  ]*[^: ][^:][^:]*\:[    ]*:'"$object"'\: :' > "$tmpdepfile"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+makedepend)
+  "$@" || exit $?
+  # Remove any Libtool call
+  if test "$libtool" = yes; then
+    while test $1 != '--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # X makedepend
+  shift
+  cleared=no
+  for arg in "$@"; do
+    case $cleared in
+    no)
+      set ""; shift
+      cleared=yes ;;
+    esac
+    case "$arg" in
+    -D*|-I*)
+      set fnord "$@" "$arg"; shift ;;
+    # Strip any option that makedepend may not understand.  Remove
+    # the object too, otherwise makedepend will parse it as a source file.
+    -*|$object)
+      ;;
+    *)
+      set fnord "$@" "$arg"; shift ;;
+    esac
+  done
+  obj_suffix="`echo $object | sed 's/^.*\././'`"
+  touch "$tmpdepfile"
+  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  sed '1,2d' "$tmpdepfile" | tr ' ' '
+' | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test $1 != '--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # Remove `-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+  "$@" -E |
+    sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
+       -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
+    sed '$ s: \\$::' > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o,
+  # because we must use -o when running libtool.
+  "$@" || exit $?
+  IFS=" "
+  for arg
+  do
+    case "$arg" in
+    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+	set fnord "$@"
+	shift
+	shift
+	;;
+    *)
+	set fnord "$@" "$arg"
+	shift
+	shift
+	;;
+    esac
+  done
+  "$@" -E |
+  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::	\1 \\:p' >> "$depfile"
+  echo "	" >> "$depfile"
+  . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+none)
+  exec "$@"
+  ;;
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+exit 0
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:

mosesdecoder/contrib/lmserver/lmserver.h ADDED Viewed

	@@ -0,0 +1,375 @@

+/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* $Id$ */
+#ifndef lmserver_lmserver_h
+#define lmserver_lmserver_h
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <event.h>
+#include <netdb.h>
+#define DATA_BUFFER_SIZE 2048
+#define UDP_READ_BUFFER_SIZE 65536
+#define UDP_MAX_PAYLOAD_SIZE 1400
+#define UDP_HEADER_SIZE 8
+#define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
+/* I'm told the max legnth of a 64-bit num converted to string is 20 bytes.
+ * Plus a few for spaces, \r\n, \0 */
+#define SUFFIX_SIZE 24
+/** Initial size of list of items being returned by "get". */
+#define ITEM_LIST_INITIAL 200
+/** Initial size of list of CAS suffixes appended to "gets" lines. */
+#define SUFFIX_LIST_INITIAL 20
+/** Initial size of the sendmsg() scatter/gather array. */
+#define IOV_LIST_INITIAL 400
+/** Initial number of sendmsg() argument structures to allocate. */
+#define MSG_LIST_INITIAL 10
+/** High water marks for buffer shrinking */
+#define READ_BUFFER_HIGHWAT 8192
+#define ITEM_LIST_HIGHWAT 400
+#define IOV_LIST_HIGHWAT 600
+#define MSG_LIST_HIGHWAT 100
+/* Get a consistent bool type */
+#if HAVE_STDBOOL_H
+# include <stdbool.h>
+#else
+  typedef enum {false = 0, true = 1} bool;
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#else
+ typedef unsigned char             uint8_t;
+#endif
+/* unistd.h is here */
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+/** Time relative to server start. Smaller than time_t on 64-bit systems. */
+typedef unsigned int rel_time_t;
+struct stats {
+    unsigned int  curr_items;
+    unsigned int  total_items;
+    uint64_t      curr_bytes;
+    unsigned int  curr_conns;
+    unsigned int  total_conns;
+    unsigned int  conn_structs;
+    uint64_t      get_cmds;
+    uint64_t      set_cmds;
+    uint64_t      get_hits;
+    uint64_t      get_misses;
+    uint64_t      evictions;
+    time_t        started;          /* when the process was started */
+    uint64_t      bytes_read;
+    uint64_t      bytes_written;
+};
+#define MAX_VERBOSITY_LEVEL 2
+struct settings {
+    size_t maxbytes;
+    int maxconns;
+    int port;
+    int udpport;
+    char *srilm;
+    int srilm_order;
+    char *inter;
+    int verbose;
+    rel_time_t oldest_live; /* ignore existing items older than this */
+    bool managed;          /* if 1, a tracker manages virtual buckets */
+    int evict_to_free;
+    char *socketpath;   /* path to unix socket if using local socket */
+    int access;  /* access mask (a la chmod) for unix domain socket */
+    double factor;          /* chunk size growth factor */
+    int chunk_size;
+    int num_threads;        /* number of libevent threads to run */
+    char prefix_delimiter;  /* character that marks a key prefix (for stats) */
+    int detail_enabled;     /* nonzero if we're collecting detailed stats */
+};
+extern struct stats stats;
+extern struct settings settings;
+#define ITEM_LINKED 1
+#define ITEM_DELETED 2
+/* temp */
+#define ITEM_SLABBED 4
+typedef struct _stritem {
+    struct _stritem *next;
+    struct _stritem *prev;
+    struct _stritem *h_next;    /* hash chain next */
+    rel_time_t      time;       /* least recent access */
+    rel_time_t      exptime;    /* expire time */
+    int             nbytes;     /* size of data */
+    unsigned short  refcount;
+    uint8_t         nsuffix;    /* length of flags-and-length string */
+    uint8_t         it_flags;   /* ITEM_* above */
+    uint8_t         slabs_clsid;/* which slab class we're in */
+    uint8_t         nkey;       /* key length, w/terminating null and padding */
+    uint64_t        cas_id;     /* the CAS identifier */
+    void * end[];
+    /* then null-terminated key */
+    /* then " flags length\r\n" (no terminating null) */
+    /* then data with terminating \r\n (no terminating null; it's binary!) */
+} item;
+#define ITEM_key(item) ((char*)&((item)->end[0]))
+/* warning: don't use these macros with a function, as it evals its arg twice */
+#define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1)
+#define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix)
+#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes)
+enum conn_states {
+    conn_listening,  /** the socket which listens for connections */
+    conn_read,       /** reading in a command line */
+    conn_write,      /** writing out a simple response */
+    conn_nread,      /** reading in a fixed number of bytes */
+    conn_swallow,    /** swallowing unnecessary bytes w/o storing */
+    conn_closing,    /** closing this connection */
+    conn_mwrite,     /** writing out many items sequentially */
+};
+#define NREAD_ADD 1
+#define NREAD_SET 2
+#define NREAD_REPLACE 3
+#define NREAD_APPEND 4
+#define NREAD_PREPEND 5
+#define NREAD_CAS 6
+typedef struct conn conn;
+struct conn {
+    int    sfd;
+    int    state;
+    struct event event;
+    short  ev_flags;
+    short  which;   /** which events were just triggered */
+    char   *rbuf;   /** buffer to read commands into */
+    char   *rcurr;  /** but if we parsed some already, this is where we stopped */
+    int    rsize;   /** total allocated size of rbuf */
+    int    rbytes;  /** how much data, starting from rcur, do we have unparsed */
+    char   *wbuf;
+    char   *wcurr;
+    int    wsize;
+    int    wbytes;
+    int    write_and_go; /** which state to go into after finishing current write */
+    void   *write_and_free; /** free this memory after finishing writing */
+    char   *ritem;  /** when we read in an item's value, it goes here */
+    int    rlbytes;
+    /* data for the nread state */
+    /**
+     * item is used to hold an item structure created after reading the command
+     * line of set/add/replace commands, but before we finished reading the actual
+     * data. The data is read into ITEM_data(item) to avoid extra copying.
+     */
+    void   *item;     /* for commands set/add/replace  */
+    int    item_comm; /* which one is it: set/add/replace */
+    /* data for the swallow state */
+    int    sbytes;    /* how many bytes to swallow */
+    /* data for the mwrite state */
+    struct iovec *iov;
+    int    iovsize;   /* number of elements allocated in iov[] */
+    int    iovused;   /* number of elements used in iov[] */
+    struct msghdr *msglist;
+    int    msgsize;   /* number of elements allocated in msglist[] */
+    int    msgused;   /* number of elements used in msglist[] */
+    int    msgcurr;   /* element in msglist[] being transmitted now */
+    int    msgbytes;  /* number of bytes in current msg */
+    item   **ilist;   /* list of items to write out */
+    int    isize;
+    item   **icurr;
+    int    ileft;
+    char   **suffixlist;
+    int    suffixsize;
+    char   **suffixcurr;
+    int    suffixleft;
+    /* data for UDP clients */
+    bool   udp;       /* is this is a UDP "connection" */
+    int    request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
+    struct sockaddr request_addr; /* Who sent the most recent request */
+    socklen_t request_addr_size;
+    unsigned char *hdrbuf; /* udp packet headers */
+    int    hdrsize;   /* number of headers' worth of space is allocated */
+    int    binary;    /* are we in binary mode */
+    int    bucket;    /* bucket number for the next command, if running as
+                         a managed instance. -1 (_not_ 0) means invalid. */
+    int    gen;       /* generation requested for the bucket */
+    bool   noreply;   /* True if the reply should not be sent. */
+    conn   *next;     /* Used for generating a list of conn structures */
+};
+/* number of virtual buckets for a managed instance */
+#define MAX_BUCKETS 32768
+/* current time of day (updated periodically) */
+extern volatile rel_time_t current_time;
+/*
+ * Functions
+ */
+conn *do_conn_from_freelist();
+bool do_conn_add_to_freelist(conn *c);
+conn *conn_new(const int sfd, const int init_state, const int event_flags, const int read_buffer_size, const bool is_udp, struct event_base *base);
+#include "stats.h"
+//#include "slabs.h"
+//#include "assoc.h"
+//#include "items.h"
+//#include "memcached_dtrace.h"
+/*
+ * In multithreaded mode, we wrap certain functions with lock management and
+ * replace the logic of some other functions. All wrapped functions have
+ * "mt_" and "do_" variants. In multithreaded mode, the plain version of a
+ * function is #define-d to the "mt_" variant, which often just grabs a
+ * lock and calls the "do_" function. In singlethreaded mode, the "do_"
+ * function is called directly.
+ *
+ * Functions such as the libevent-related calls that need to do cross-thread
+ * communication in multithreaded mode (rather than actually doing the work
+ * in the current thread) are called via "dispatch_" frontends, which are
+ * also #define-d to directly call the underlying code in singlethreaded mode.
+ */
+#ifdef USE_THREADS
+void thread_init(int nthreads, struct event_base *main_base);
+int  dispatch_event_add(int thread, conn *c);
+void dispatch_conn_new(int sfd, int init_state, int event_flags, int read_buffer_size, int is_udp);
+/* Lock wrappers for cache functions that are called from main loop. */
+char *mt_add_delta(conn *c, item *item, const int incr, const int64_t delta,
+                   char *buf);
+void mt_assoc_move_next_bucket(void);
+conn *mt_conn_from_freelist(void);
+bool  mt_conn_add_to_freelist(conn *c);
+char *mt_suffix_from_freelist(void);
+bool  mt_suffix_add_to_freelist(char *s);
+char *mt_defer_delete(item *it, time_t exptime);
+int   mt_is_listen_thread(void);
+item *mt_item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes);
+char *mt_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes);
+void  mt_item_flush_expired(void);
+item *mt_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked);
+int   mt_item_link(item *it);
+void  mt_item_remove(item *it);
+int   mt_item_replace(item *it, item *new_it);
+char *mt_item_stats(int *bytes);
+char *mt_item_stats_sizes(int *bytes);
+void  mt_item_unlink(item *it);
+void  mt_item_update(item *it);
+void  mt_run_deferred_deletes(void);
+void *mt_slabs_alloc(size_t size, unsigned int id);
+void  mt_slabs_free(void *ptr, size_t size, unsigned int id);
+int   mt_slabs_reassign(unsigned char srcid, unsigned char dstid);
+char *mt_slabs_stats(int *buflen);
+void  mt_stats_lock(void);
+void  mt_stats_unlock(void);
+int   mt_store_item(item *item, int comm);
+# define add_delta(c,x,y,z,a)        mt_add_delta(c,x,y,z,a)
+# define assoc_move_next_bucket()    mt_assoc_move_next_bucket()
+# define conn_from_freelist()        mt_conn_from_freelist()
+# define conn_add_to_freelist(x)     mt_conn_add_to_freelist(x)
+# define suffix_from_freelist()      mt_suffix_from_freelist()
+# define suffix_add_to_freelist(x)   mt_suffix_add_to_freelist(x)
+# define defer_delete(x,y)           mt_defer_delete(x,y)
+# define is_listen_thread()          mt_is_listen_thread()
+# define item_alloc(x,y,z,a,b)       mt_item_alloc(x,y,z,a,b)
+# define item_cachedump(x,y,z)       mt_item_cachedump(x,y,z)
+# define item_flush_expired()        mt_item_flush_expired()
+# define item_get_notedeleted(x,y,z) mt_item_get_notedeleted(x,y,z)
+# define item_link(x)                mt_item_link(x)
+# define item_remove(x)              mt_item_remove(x)
+# define item_replace(x,y)           mt_item_replace(x,y)
+# define item_stats(x)               mt_item_stats(x)
+# define item_stats_sizes(x)         mt_item_stats_sizes(x)
+# define item_update(x)              mt_item_update(x)
+# define item_unlink(x)              mt_item_unlink(x)
+# define run_deferred_deletes()      mt_run_deferred_deletes()
+# define slabs_alloc(x,y)            mt_slabs_alloc(x,y)
+# define slabs_free(x,y,z)           mt_slabs_free(x,y,z)
+# define slabs_reassign(x,y)         mt_slabs_reassign(x,y)
+# define slabs_stats(x)              mt_slabs_stats(x)
+# define store_item(x,y)             mt_store_item(x,y)
+# define STATS_LOCK()                mt_stats_lock()
+# define STATS_UNLOCK()              mt_stats_unlock()
+#else /* !USE_THREADS */
+# define add_delta(c,x,y,z,a)          do_add_delta(c,x,y,z,a)
+# define assoc_move_next_bucket()    do_assoc_move_next_bucket()
+# define conn_from_freelist()        do_conn_from_freelist()
+# define conn_add_to_freelist(x)     do_conn_add_to_freelist(x)
+# define suffix_from_freelist()      do_suffix_from_freelist()
+# define suffix_add_to_freelist(x)   do_suffix_add_to_freelist(x)
+# define defer_delete(x,y)           do_defer_delete(x,y)
+# define dispatch_conn_new(x,y,z,a,b) conn_new(x,y,z,a,b,main_base)
+# define dispatch_event_add(t,c)     event_add(&(c)->event, 0)
+# define is_listen_thread()          1
+# define item_alloc(x,y,z,a,b)       do_item_alloc(x,y,z,a,b)
+# define item_cachedump(x,y,z)       do_item_cachedump(x,y,z)
+# define item_flush_expired()        do_item_flush_expired()
+# define item_get_notedeleted(x,y,z) do_item_get_notedeleted(x,y,z)
+# define item_link(x)                do_item_link(x)
+# define item_remove(x)              do_item_remove(x)
+# define item_replace(x,y)           do_item_replace(x,y)
+# define item_stats(x)               do_item_stats(x)
+# define item_stats_sizes(x)         do_item_stats_sizes(x)
+# define item_unlink(x)              do_item_unlink(x)
+# define item_update(x)              do_item_update(x)
+# define run_deferred_deletes()      do_run_deferred_deletes()
+# define slabs_alloc(x,y)            do_slabs_alloc(x,y)
+# define slabs_free(x,y,z)           do_slabs_free(x,y,z)
+# define slabs_reassign(x,y)         do_slabs_reassign(x,y)
+# define slabs_stats(x)              do_slabs_stats(x)
+# define store_item(x,y)             do_store_item(x,y)
+# define thread_init(x,y)            0
+# define STATS_LOCK()                /**/
+# define STATS_UNLOCK()              /**/
+#endif /* !USE_THREADS */
+/* If supported, give compiler hints for branch prediction. */
+#if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
+#define __builtin_expect(x, expected_value) (x)
+#endif
+#define likely(x)       __builtin_expect((x),1)
+#define unlikely(x)     __builtin_expect((x),0)
+#endif

mosesdecoder/contrib/lmserver/stamp-h1 ADDED Viewed

	@@ -0,0 +1 @@


1	+ timestamp for config.h

mosesdecoder/contrib/mert-moses-multi.pl ADDED Viewed

	@@ -0,0 +1,1529 @@

+#!/usr/bin/perl -w
+# $Id$
+# Usage:
+# mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
+# For other options see below or run 'mert-moses.pl --help'
+#
+# NB: This is a variant of of mert-moses.pl for use with the interpolated scorer
+# (MergeScorer) described in the following paper:
+#
+# "Optimising Multiple Metrics with MERT" by Christophe Servan and Holger Schwenk,
+#   Prague Bulletin of Mathematical Linguistics 96 (2011) p109-117
+#   http://www-lium.univ-lemans.fr/~servan/publications/Servan_PBML_2011.pdf
+#
+# If you are not using MergeScorer, then you should use the mert-moses.pl script instead
+#
+# Notes:
+# <foreign> and <english> should be raw text files, one sentence per line
+# <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
+# Excerpts from revision history
+# Sept 2011   multi-threaded mert (Barry Haddow)
+# 3 Aug 2011  Added random directions, historic best, pairwise ranked (PK)
+# Jul 2011    simplifications (Ondrej Bojar)
+#             -- rely on moses' -show-weights instead of parsing moses.ini
+#                ... so moses is also run once *before* mert starts, checking
+#                    the model to some extent
+#             -- got rid of the 'triples' mess;
+#                use --range to supply bounds for random starting values:
+#                --range tm:-3..3 --range lm:-3..3
+# 5 Aug 2009  Handling with different reference length policies (shortest, average, closest) for BLEU
+#             and case-sensistive/insensitive evaluation (Nicola Bertoldi)
+# 5 Jun 2008  Forked previous version to support new mert implementation.
+# 13 Feb 2007 Better handling of default values for lambda, now works with multiple
+#             models and lexicalized reordering
+# 11 Oct 2006 Handle different input types through parameter --inputype=[0|1]
+#             (0 for text, 1 for confusion network, default is 0) (Nicola Bertoldi)
+# 10 Oct 2006 Allow skip of filtering of phrase tables (--no-filter-phrase-table)
+#             useful if binary phrase tables are used (Nicola Bertoldi)
+# 28 Aug 2006 Use either closest or average or shortest (default) reference
+#             length as effective reference length
+#             Use either normalization or not (default) of texts (Nicola Bertoldi)
+# 31 Jul 2006 move gzip run*.out to avoid failure wit restartings
+#             adding default paths
+# 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
+# 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
+# 27 Jul 2006 adding the safesystem() function to handle with process failure
+# 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
+# 21 Jul 2006 adapted for Moses-in-parallel (Nicola Bertoldi)
+# 18 Jul 2006 adapted for Moses and cleaned up (PK)
+# 21 Jan 2005 unified various versions, thorough cleanup (DWC)
+#             now indexing accumulated n-best list solely by feature vectors
+# 14 Dec 2004 reimplemented find_threshold_points in C (NMD)
+# 25 Oct 2004 Use either average or shortest (default) reference
+#             length as effective reference length (DWC)
+# 13 Oct 2004 Use alternative decoders (DWC)
+# Original version by Philipp Koehn
+use FindBin qw($RealBin);
+use File::Basename;
+use File::Path;
+my $SCRIPTS_ROOTDIR = $RealBin;
+$SCRIPTS_ROOTDIR =~ s/\/training$//;
+$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
+## We preserve this bit of comments to keep the traditional weight ranges.
+#     "w" => [ [ 0.0, -1.0, 1.0 ] ],  # word penalty
+#     "d"  => [ [ 1.0, 0.0, 2.0 ] ],  # lexicalized reordering model
+#     "lm" => [ [ 1.0, 0.0, 2.0 ] ],  # language model
+#     "g"  => [ [ 1.0, 0.0, 2.0 ],    # generation model
+# 	      [ 1.0, 0.0, 2.0 ] ],
+#     "tm" => [ [ 0.3, 0.0, 0.5 ],    # translation model
+# 	      [ 0.2, 0.0, 0.5 ],
+# 	      [ 0.3, 0.0, 0.5 ],
+# 	      [ 0.2, 0.0, 0.5 ],
+# 	      [ 0.0,-1.0, 1.0 ] ],  # ... last weight is phrase penalty
+#     "lex"=> [ [ 0.1, 0.0, 0.2 ] ],  # global lexical model
+#     "I"  => [ [ 0.0,-1.0, 1.0 ] ],  # input lattice scores
+# moses.ini file uses FULL names for lambdas, while this training script
+# internally (and on the command line) uses ABBR names.
+my @ABBR_FULL_MAP = qw(d=weight-d lm=weight-l tm=weight-t w=weight-w
+  g=weight-generation lex=weight-lex I=weight-i);
+my %ABBR2FULL = map {split/=/,$_,2} @ABBR_FULL_MAP;
+my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} @ABBR_FULL_MAP;
+my $minimum_required_change_in_weights = 0.00001;
+    # stop if no lambda changes more than this
+my $verbose = 0;
+my $usage = 0; # request for --help
+my $___WORKING_DIR = "mert-work";
+my $___DEV_F = undef; # required, input text to decode
+my $___DEV_E = undef; # required, basename of files with references
+my $___DECODER = undef; # required, pathname to the decoder executable
+my $___CONFIG = undef; # required, pathname to startup ini file
+my $___N_BEST_LIST_SIZE = 100;
+my $queue_flags = "-hard";  # extra parameters for parallelizer
+      # the -l ws0ssmt was relevant only to JHU 2006 workshop
+my $___JOBS = undef; # if parallel, number of jobs to use (undef or 0 -> serial)
+my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
+my $continue = 0; # should we try to continue from the last saved step?
+my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
+my $___FILTER_PHRASE_TABLE = 1; # filter phrase table
+my $___PREDICTABLE_SEEDS = 0;
+my $___START_WITH_HISTORIC_BESTS = 0; # use best settings from all previous iterations as starting points [Foster&Kuhn,2009]
+my $___RANDOM_DIRECTIONS = 0; # search in random directions only
+my $___NUM_RANDOM_DIRECTIONS = 0; # number of random directions, also works with default optimizer [Cer&al.,2008]
+my $___PAIRWISE_RANKED_OPTIMIZER = 0; # use Hopkins&May[2011]
+my $___PRO_STARTING_POINT = 0; # get a starting point from pairwise ranked optimizer
+my $___RANDOM_RESTARTS = 20;
+my $___HISTORIC_INTERPOLATION = 0; # interpolate optimize weights with previous iteration's weights [Hopkins&May,2011,5.4.3]
+my $__THREADS = 0;
+# Parameter for effective reference length when computing BLEU score
+# Default is to use shortest reference
+# Use "--shortest" to use shortest reference length
+# Use "--average" to use average reference length
+# Use "--closest" to use closest reference length
+# Only one between --shortest, --average and --closest can be set
+# If more than one choice the defualt (--shortest) is used
+my $___SHORTEST = 0;
+my $___AVERAGE = 0;
+my $___CLOSEST = 0;
+# Use "--nocase" to compute case-insensitive scores
+my $___NOCASE = 0;
+# Use "--nonorm" to non normalize translation before computing scores
+my $___NONORM = 0;
+# set 0 if input type is text, set 1 if input type is confusion network
+my $___INPUTTYPE = 0;
+my $mertdir = undef; # path to new mert directory
+my $mertargs = undef; # args to pass through to mert & extractor
+my $mertmertargs = undef; # args to pass through to mert only
+my $filtercmd = undef; # path to filter-model-given-input.pl
+my $filterfile = undef;
+my $qsubwrapper = undef;
+my $moses_parallel_cmd = undef;
+my $scorer_config = "BLEU:1";
+my $old_sge = 0; # assume sge<6.0
+my $___CONFIG_ORIG = undef; # pathname to startup ini file before filtering
+my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
+                                  # if undef work on all features
+                                  # (others are fixed to the starting values)
+my $___RANGES = undef;
+my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loading data (default =-1)
+                                  # -1 means all previous, i.e. from iteration 1
+                                  # 0 means no previous data, i.e. from actual iteration
+                                  # 1 means 1 previous data , i.e. from the actual iteration and from the previous one
+                                  # and so on
+my $maximum_iterations = 25;
+use strict;
+use Getopt::Long;
+GetOptions(
+  "working-dir=s" => \$___WORKING_DIR,
+  "input=s" => \$___DEV_F,
+  "inputtype=i" => \$___INPUTTYPE,
+  "refs=s" => \$___DEV_E,
+  "decoder=s" => \$___DECODER,
+  "config=s" => \$___CONFIG,
+  "nbest=i" => \$___N_BEST_LIST_SIZE,
+  "queue-flags=s" => \$queue_flags,
+  "jobs=i" => \$___JOBS,
+  "decoder-flags=s" => \$___DECODER_FLAGS,
+  "continue" => \$continue,
+  "skip-decoder" => \$skip_decoder,
+  "shortest" => \$___SHORTEST,
+  "average" => \$___AVERAGE,
+  "closest" => \$___CLOSEST,
+  "nocase" => \$___NOCASE,
+  "nonorm" => \$___NONORM,
+  "help" => \$usage,
+  "verbose" => \$verbose,
+  "mertdir=s" => \$mertdir,
+  "mertargs=s" => \$mertargs,
+  "mertmertargs=s" => \$mertmertargs,
+  "rootdir=s" => \$SCRIPTS_ROOTDIR,
+  "filtercmd=s" => \$filtercmd, # allow to override the default location
+  "filterfile=s" => \$filterfile, # input to filtering script (useful for lattices/confnets)
+  "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
+  "mosesparallelcmd=s" => \$moses_parallel_cmd, # allow to override the default location
+  "old-sge" => \$old_sge, #passed to moses-parallel
+  "filter-phrase-table!" => \$___FILTER_PHRASE_TABLE, # (dis)allow of phrase tables
+  "predictable-seeds" => \$___PREDICTABLE_SEEDS, # make random restarts deterministic
+  "historic-bests" => \$___START_WITH_HISTORIC_BESTS, # use best settings from all previous iterations as starting points
+  "random-directions" => \$___RANDOM_DIRECTIONS, # search only in random directions
+  "number-of-random-directions=i" => \$___NUM_RANDOM_DIRECTIONS, # number of random directions
+  "random-restarts=i" => \$___RANDOM_RESTARTS, # number of random restarts
+  "activate-features=s" => \$___ACTIVATE_FEATURES, #comma-separated (or blank-separated) list of features to work on (others are fixed to the starting values)
+  "range=s@" => \$___RANGES,
+  "prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous)
+  "maximum-iterations=i" => \$maximum_iterations,
+  "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
+  "pro-starting-point" => \$___PRO_STARTING_POINT,
+  "historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
+  "threads=i" => \$__THREADS,
+  "sc-config=s" => \$scorer_config
+) or exit(1);
+# the 4 required parameters can be supplied on the command line directly
+# or using the --options
+if (scalar @ARGV == 4) {
+  # required parameters: input_file references_basename decoder_executable
+  $___DEV_F = shift;
+  $___DEV_E = shift;
+  $___DECODER = shift;
+  $___CONFIG = shift;
+}
+if ($usage || !defined $___DEV_F || !defined $___DEV_E || !defined $___DECODER || !defined $___CONFIG) {
+  print STDERR "usage: $0 input-text references decoder-executable decoder.ini
+Options:
+  --working-dir=mert-dir ... where all the files are created
+  --nbest=100            ... how big nbestlist to generate
+  --jobs=N               ... set this to anything to run moses in parallel
+  --mosesparallelcmd=STR ... use a different script instead of moses-parallel
+  --queue-flags=STRING   ... anything you with to pass to qsub, eg.
+                             '-l ws06osssmt=true'. The default is: '-hard'
+                             To reset the parameters, please use
+                             --queue-flags=' '
+                             (i.e. a space between the quotes).
+  --decoder-flags=STRING ... extra parameters for the decoder
+  --continue             ... continue from the last successful iteration
+  --skip-decoder         ... skip the decoder run for the first time,
+                             assuming that we got interrupted during
+                             optimization
+  --shortest --average --closest
+                         ... Use shortest/average/closest reference length
+                             as effective reference length (mutually exclusive)
+  --nocase               ... Do not preserve case information; i.e.
+                             case-insensitive evaluation (default is false).
+  --nonorm               ... Do not use text normalization (flag is not active,
+                             i.e. text is NOT normalized)
+  --filtercmd=STRING     ... path to filter-model-given-input.pl
+  --filterfile=STRING    ... path to alternative to input-text for filtering
+                             model. useful for lattice decoding
+  --rootdir=STRING       ... where do helpers reside (if not given explicitly)
+  --mertdir=STRING       ... path to new mert implementation
+  --mertargs=STRING      ... extra args for mert, eg. to specify scorer
+  --mertmertargs=STRING  ... extra args for mert only,
+  --scorenbestcmd=STRING ... path to score-nbest.py
+  --old-sge              ... passed to parallelizers, assume Grid Engine < 6.0
+  --inputtype=[0|1|2]    ... Handle different input types: (0 for text,
+                             1 for confusion network, 2 for lattices,
+                             default is 0)
+  --no-filter-phrase-table ... disallow filtering of phrase tables
+                              (useful if binary phrase tables are available)
+  --random-restarts=INT  ... number of random restarts (default: 20)
+  --predictable-seeds    ... provide predictable seeds to mert so that random
+                             restarts are the same on every run
+  --range=tm:0..1,-1..1  ... specify min and max value for some features
+                             --range can be repeated as needed.
+                             The order of the various --range specifications
+                             is important only within a feature name.
+                             E.g.:
+                               --range=tm:0..1,-1..1 --range=tm:0..2
+                             is identical to:
+                               --range=tm:0..1,-1..1,0..2
+                             but not to:
+                               --range=tm:0..2 --range=tm:0..1,-1..1
+  --activate-features=STRING  ... comma-separated list of features to optimize,
+                                  others are fixed to the starting values
+                                  default: optimize all features
+                                  example: tm_0,tm_4,d_0
+  --prev-aggregate-nbestlist=INT ... number of previous step to consider when
+                                     loading data (default = $prev_aggregate_nbl_size)
+                                    -1 means all previous, i.e. from iteration 1
+                                     0 means no previous data, i.e. only the
+                                       current iteration
+                                     N means this and N previous iterations
+  --maximum-iterations=ITERS ... Maximum number of iterations. Default: $maximum_iterations
+  --random-directions               ... search only in random directions
+  --number-of-random-directions=int ... number of random directions
+                                        (also works with regular optimizer, default: 0)
+  --pairwise-ranked         ... Use PRO for optimisation (Hopkins and May, emnlp 2011)
+  --pro-starting-point      ... Use PRO to get a starting point for MERT
+  --threads=NUMBER          ... Use multi-threaded mert (must be compiled in).
+  --historic-interpolation  ... Interpolate optimized weights with prior iterations' weight
+                                (parameter sets factor [0;1] given to current weights)
+  --sc-config=STRING     ... extra option to specify multiscoring.
+";
+  exit 1;
+}
+# Check validity of input parameters and set defaults if needed
+print STDERR "Using WORKING_DIR: $___WORKING_DIR\n";
+print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
+# path of script for filtering phrase tables and running the decoder
+$filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;
+if ( ! -x $filtercmd && ! $___FILTER_PHRASE_TABLE) {
+  print STDERR "Filtering command not found: $filtercmd.\n";
+  print STDERR "Use --filtercmd=PATH to specify a valid one or --no-filter-phrase-table\n";
+  exit 1;
+}
+$qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl" if !defined $qsubwrapper;
+$moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl"
+  if !defined $moses_parallel_cmd;
+if (!defined $mertdir) {
+  $mertdir = "/usr/bin";
+  print STDERR "Assuming --mertdir=$mertdir\n";
+}
+my $mert_extract_cmd = "$mertdir/extractor";
+my $mert_mert_cmd = "$mertdir/mert";
+die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
+die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
+my $pro_optimizer = "$mertdir/megam_i686.opt"; # or set to your installation
+if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optimizer) {
+  print "did not find $pro_optimizer, installing it in $mertdir\n";
+  `cd $mertdir; wget http://www.cs.utah.edu/~hal/megam/megam_i686.opt.gz;`;
+  `gunzip $pro_optimizer.gz`;
+  `chmod +x $pro_optimizer`;
+  die("ERROR: Installation of megam_i686.opt failed! Install by hand from http://www.cs.utah.edu/~hal/megam/") unless -x $pro_optimizer;
+}
+$mertargs = "" if !defined $mertargs;
+my $scconfig = undef;
+if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/){
+  $scconfig=$1;
+  $scconfig =~ s/\,/ /g;
+  $mertargs =~ s/\-\-scconfig\s+(.+?)(\s|$)//;
+}
+# handling reference lengh strategy
+if (($___CLOSEST + $___AVERAGE + $___SHORTEST) > 1){
+  die "You can specify just ONE reference length strategy (closest or shortest or average) not both\n";
+}
+if ($___SHORTEST){
+  $scconfig .= " reflen:shortest";
+}elsif ($___AVERAGE){
+  $scconfig .= " reflen:average";
+}elsif ($___CLOSEST){
+  $scconfig .= " reflen:closest";
+}
+# handling case-insensitive flag
+if ($___NOCASE) {
+  $scconfig .= " case:false";
+}else{
+  $scconfig .= " case:true";
+}
+$scconfig =~ s/^\s+//;
+$scconfig =~ s/\s+$//;
+$scconfig =~ s/\s+/,/g;
+$scconfig = "--scconfig $scconfig" if ($scconfig);
+my $mert_extract_args=$mertargs;
+$mert_extract_args .=" $scconfig";
+$mertmertargs = "" if !defined $mertmertargs;
+my $mert_mert_args="$mertargs $mertmertargs";
+$mert_mert_args =~ s/\-+(binary|b)\b//;
+$mert_mert_args .=" $scconfig";
+if ($___ACTIVATE_FEATURES){ $mert_mert_args .=" -o \"$___ACTIVATE_FEATURES\""; }
+my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
+die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
+die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
+die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
+die "Not executable: $___DECODER" if ! -x $___DECODER;
+my $input_abs = ensure_full_path($___DEV_F);
+die "File not found: $___DEV_F (interpreted as $input_abs)."
+  if ! -e $input_abs;
+$___DEV_F = $input_abs;
+# Option to pass to qsubwrapper and moses-parallel
+my $pass_old_sge = $old_sge ? "-old-sge" : "";
+my $decoder_abs = ensure_full_path($___DECODER);
+die "File not executable: $___DECODER (interpreted as $decoder_abs)."
+  if ! -x $decoder_abs;
+$___DECODER = $decoder_abs;
+my $ref_abs = ensure_full_path($___DEV_E);
+# check if English dev set (reference translations) exist and store a list of all references
+my @references;
+if (-e $ref_abs) {
+  push @references, $ref_abs;
+}
+else {
+  # if multiple file, get a full list of the files
+    my $part = 0;
+    while (-e $ref_abs.$part) {
+        push @references, $ref_abs.$part;
+        $part++;
+    }
+    die("Reference translations not found: $___DEV_E (interpreted as $ref_abs)") unless $part;
+}
+my $config_abs = ensure_full_path($___CONFIG);
+die "File not found: $___CONFIG (interpreted as $config_abs)."
+  if ! -e $config_abs;
+$___CONFIG = $config_abs;
+# moses should use our config
+if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
+) {
+  die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
+}
+# as weights are normalized in the next steps (by cmert)
+# normalize initial LAMBDAs, too
+my $need_to_normalize = 1;
+#store current directory and create the working directory (if needed)
+my $cwd = `pawd 2>/dev/null`;
+if(!$cwd){$cwd = `pwd`;}
+chomp($cwd);
+mkpath($___WORKING_DIR);
+{
+# open local scope
+#chdir to the working directory
+chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
+# fixed file names
+my $mert_outfile = "mert.out";
+my $mert_logfile = "mert.log";
+my $weights_in_file = "init.opt";
+my $weights_out_file = "weights.txt";
+# set start run
+my $start_run = 1;
+my $bestpoint = undef;
+my $devbleu = undef;
+my $sparse_weights_file = undef;
+my $prev_feature_file = undef;
+my $prev_score_file = undef;
+my $prev_init_file = undef;
+if ($___FILTER_PHRASE_TABLE) {
+  my $outdir = "filtered";
+  if (-e "$outdir/moses.ini") {
+    print STDERR "Assuming the tables are already filtered, reusing $outdir/moses.ini\n";
+  }
+  else {
+    # filter the phrase tables with respect to input, use --decoder-flags
+    print STDERR "filtering the phrase tables... ".`date`;
+    my $___FILTER_F  = $___DEV_F;
+    $___FILTER_F = $filterfile if (defined $filterfile);
+    my $cmd = "$filtercmd ./$outdir $___CONFIG $___FILTER_F";
+    &submit_or_exec($cmd,"filterphrases.out","filterphrases.err");
+  }
+  # make a backup copy of startup ini filepath
+  $___CONFIG_ORIG = $___CONFIG;
+  # the decoder should now use the filtered model
+  $___CONFIG = "$outdir/moses.ini";
+}
+else{
+  # do not filter phrase tables (useful if binary phrase tables are available)
+  # use the original configuration file
+  $___CONFIG_ORIG = $___CONFIG;
+}
+# we run moses to check validity of moses.ini and to obtain all the feature
+# names
+my $featlist = get_featlist_from_moses($___CONFIG);
+$featlist = insert_ranges_to_featlist($featlist, $___RANGES);
+# Mark which features are disabled:
+if (defined $___ACTIVATE_FEATURES) {
+  my %enabled = map { ($_, 1) } split /[, ]+/, $___ACTIVATE_FEATURES;
+  my %cnt;
+  for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+    my $name = $featlist->{"names"}->[$i];
+    $cnt{$name} = 0 if !defined $cnt{$name};
+    $featlist->{"enabled"}->[$i] = $enabled{$name."_".$cnt{$name}};
+    $cnt{$name}++;
+  }
+} else {
+  # all enabled
+  for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+    $featlist->{"enabled"}->[$i] = 1;
+  }
+}
+print STDERR "MERT starting values and ranges for random generation:\n";
+for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+  my $name = $featlist->{"names"}->[$i];
+  my $val = $featlist->{"values"}->[$i];
+  my $min = $featlist->{"mins"}->[$i];
+  my $max = $featlist->{"maxs"}->[$i];
+  my $enabled = $featlist->{"enabled"}->[$i];
+  printf STDERR "  %5s = %7.3f", $name, $val;
+  if ($enabled) {
+    printf STDERR " (%5.2f .. %5.2f)\n", $min, $max;
+  } else {
+    print STDERR " --- inactive, not optimized ---\n";
+  }
+}
+if ($continue) {
+  # getting the last finished step
+  print STDERR "Trying to continue an interrupted optimization.\n";
+  open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt";
+  my $step = <IN>;
+  chomp $step;
+  close IN;
+  print STDERR "Last finished step is $step\n";
+  # getting the first needed step
+  my $firststep;
+  if ($prev_aggregate_nbl_size==-1){
+    $firststep=1;
+  }
+  else{
+    $firststep=$step-$prev_aggregate_nbl_size+1;
+    $firststep=($firststep>0)?$firststep:1;
+  }
+#checking if all needed data are available
+  if ($firststep<=$step){
+    print STDERR "First previous needed data index is $firststep\n";
+    print STDERR "Checking whether all needed data (from step $firststep to step $step) are available\n";
+    for (my $prevstep=$firststep; $prevstep<=$step;$prevstep++){
+      print STDERR "Checking whether data of step $prevstep are available\n";
+      if (! -e "run$prevstep.features.dat"){
+	die "Can't start from step $step, because run$prevstep.features.dat was not found!";
+      }else{
+	if (defined $prev_feature_file){
+	  $prev_feature_file = "${prev_feature_file},run$prevstep.features.dat";
+	}
+	else{
+	  $prev_feature_file = "run$prevstep.features.dat";
+	}
+      }
+      if (! -e "run$prevstep.scores.dat"){
+	die "Can't start from step $step, because run$prevstep.scores.dat was not found!";
+      }else{
+	if (defined $prev_score_file){
+	  $prev_score_file = "${prev_score_file},run$prevstep.scores.dat";
+	}
+	else{
+	  $prev_score_file = "run$prevstep.scores.dat";
+	}
+      }
+      if (! -e "run$prevstep.${weights_in_file}"){
+	die "Can't start from step $step, because run$prevstep.${weights_in_file} was not found!";
+      }else{
+        if (defined $prev_init_file){
+          $prev_init_file = "${prev_init_file},run$prevstep.${weights_in_file}";
+        }
+        else{
+          $prev_init_file = "run$prevstep.${weights_in_file}";
+        }
+      }
+    }
+    if (! -e "run$step.weights.txt"){
+      die "Can't start from step $step, because run$step.weights.txt was not found!";
+    }
+    if (! -e "run$step.$mert_logfile"){
+      die "Can't start from step $step, because run$step.$mert_logfile was not found!";
+    }
+    if (! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz"){
+      die "Can't start from step $step, because run$step.best$___N_BEST_LIST_SIZE.out.gz was not found!";
+    }
+    print STDERR "All needed data are available\n";
+    print STDERR "Loading information from last step ($step)\n";
+    my %dummy; # sparse features
+    ($bestpoint,$devbleu) = &get_weights_from_mert("run$step.$mert_outfile","run$step.$mert_logfile",scalar @{$featlist->{"names"}},\%dummy);
+    die "Failed to parse mert.log, missed Best point there."
+      if !defined $bestpoint || !defined $devbleu;
+    print "($step) BEST at $step $bestpoint => $devbleu at ".`date`;
+    my @newweights = split /\s+/, $bestpoint;
+    # Sanity check: order of lambdas must match
+    sanity_check_order_of_lambdas($featlist,
+      "gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |");
+    # update my cache of lambda values
+    $featlist->{"values"} = \@newweights;
+  }
+  else{
+    print STDERR "No previous data are needed\n";
+  }
+  $start_run = $step +1;
+}
+###### MERT MAIN LOOP
+my $run=$start_run-1;
+my $oldallsorted = undef;
+my $allsorted = undef;
+my $nbest_file=undef;
+while(1) {
+  $run++;
+  if ($maximum_iterations && $run > $maximum_iterations) {
+      print "Maximum number of iterations exceeded - stopping\n";
+      last;
+  }
+  # run beamdecoder with option to output nbestlists
+  # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists
+  print "run $run start at ".`date`;
+  # In case something dies later, we might wish to have a copy
+  create_config($___CONFIG, "./run$run.moses.ini", $featlist, $run, (defined$devbleu?$devbleu:"--not-estimated--"),$sparse_weights_file);
+  # skip running the decoder if the user wanted
+  if (!$skip_decoder) {
+      print "($run) run decoder to produce n-best lists\n";
+      $nbest_file = run_decoder($featlist, $run, $need_to_normalize);
+      $need_to_normalize = 0;
+      safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out";
+      $nbest_file = $nbest_file.".gz";
+  }
+  else {
+      $nbest_file="run$run.best$___N_BEST_LIST_SIZE.out.gz";
+      print "skipped decoder run $run\n";
+      $skip_decoder = 0;
+      $need_to_normalize = 0;
+  }
+  # extract score statistics and features from the nbest lists
+  print STDERR "Scoring the nbestlist.\n";
+  my $base_feature_file = "features.dat";
+  my $base_score_file = "scores.dat";
+  my $feature_file = "run$run.${base_feature_file}";
+  my $score_file = "run$run.${base_score_file}";
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+  my $cmd = "";
+  my $scorer_name;
+  my $scorer_weight;
+  $scorer_config=~s/ //g;
+  my @lists_scorer_config=split(",",$scorer_config);
+  $mert_mert_args=$mert_mert_args." --sctype MERGE ";
+  my $scorer_config_spec;
+  foreach $scorer_config_spec(@lists_scorer_config)
+  {
+#     print STDERR $scorer_config_spec."\n";
+    my @lists_scorer_config_spec=split(":",$scorer_config_spec);
+    $scorer_name=$lists_scorer_config_spec[0];
+    $scorer_weight=$lists_scorer_config_spec[1];
+#     print STDERR $scorer_name."\n";
+#     print STDERR $scorer_weight."\n";
+    $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file.$scorer_name --ffile $feature_file.$scorer_name --sctype $scorer_name -r ".join(",", @references)." -n $nbest_file";
+#     print STDERR "LANCEMENT $scorer_name ********************************************\n";
+    &submit_or_exec($cmd,"extract.out.$scorer_name","extract.err.$scorer_name");
+#     print STDERR "FIN $scorer_name ************************************************** \n";
+#   print STDERR "executing $cmd\n";
+#   print STDERR "\n";
+#   safesystem("date");
+#   print STDERR "\n";
+#   if (defined $___JOBS) {
+#     safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=extract.out.$scorer_name -stderr=extract.err.$scorer_name" )
+#       or die "$scorer_name Failed to submit extraction to queue (via $qsubwrapper)";
+#   } else {
+#     safesystem("$cmd > extract.out.$scorer_name 2> extract.err.$scorer_name") or die "$scorer_name Failed to do extraction of statistics.";
+#   }
+#   print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n";
+  }
+#  print STDERR "CREATION INI\n";
+  my @scorer_content;
+  my @feature_content;
+  my $fileIncrement=0;
+  my $minSizeIncrement=-1;
+  open(FILE,">merge.init") || die ("File creation ERROR : merge.init");
+  foreach $scorer_config_spec(@lists_scorer_config)
+  {
+    my @lists_scorer_config_spec=split(":",$scorer_config_spec);
+    $scorer_name=$lists_scorer_config_spec[0];
+    $scorer_weight=$lists_scorer_config_spec[1];
+    print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n";
+    my @tmp_load_content=`/bin/cat $score_file.$scorer_name`;
+    my @tmp_load_feat_content=`/bin/cat $feature_file.$scorer_name`;
+    my @tmp_content;
+    my @tmp_feat_content;
+    my $contentIncrement=0;
+    my @tmp_part_content;
+    my $increment_part=0;
+    while ($contentIncrement<scalar(@tmp_load_feat_content))
+    {
+	my $line=$tmp_load_feat_content[$contentIncrement];
+	chomp($line);
+        $line=~s/^[ ]+//g;
+        $line=~s/[ ]+$//g;
+        $line=~s/[ ]+/ /g;
+	push @tmp_part_content,$line;
+	if (rindex($line,"FEATURES_TXT_END")>-1)
+	{
+	   $tmp_feat_content[$increment_part] = [ @tmp_part_content ];
+	   $increment_part++;
+	   @tmp_part_content=();
+	}
+	$contentIncrement++;
+    }
+    $contentIncrement=0;
+    $increment_part=0;
+    @tmp_part_content=();
+    while ($contentIncrement<scalar(@tmp_load_content))
+    {
+	my $line=$tmp_load_content[$contentIncrement];
+	chomp($line);
+        $line=~s/^[ ]+//g;
+        $line=~s/[ ]+$//g;
+        $line=~s/[ ]+/ /g;
+	push @tmp_part_content,$line;
+	if (rindex($line,"SCORES_TXT_END")>-1)
+	{
+	   $tmp_content[$increment_part] = [ @tmp_part_content ];
+	   $increment_part++;
+	   @tmp_part_content=();
+	}
+	$contentIncrement++;
+    }
+    if ($minSizeIncrement<0 || $minSizeIncrement>$increment_part)
+    {
+	$minSizeIncrement=$increment_part;
+    }
+    $scorer_content[$fileIncrement] = [ @tmp_content ];
+    $feature_content[$fileIncrement] = [ @tmp_feat_content ];
+#     if ($fileIncrement==0)
+#     {
+# 	`/bin/cp $feature_file.$scorer_name $feature_file`;
+#     }
+    $fileIncrement++;
+  }
+  close(FILE);
+#   print STDERR "\n";
+#   safesystem("date");
+#   print STDERR "\n";
+#   print STDERR "ON  VA RASSEMBLER dans $score_file\n";
+  open(SCOREFILE,">$score_file") || die ("File creation ERROR : $score_file");
+  open(FEATFILE,">$feature_file") || die ("File creation ERROR : $feature_file");
+  my $newFileIncrement=0;
+  my $contentIncrement=0;
+  my $maxContent=100;
+  my $increment_part=0;
+  my $contentSize=scalar(@{$scorer_content[0]});
+#   print STDERR "TAILLE : ".$contentSize."|".$fileIncrement."|".$minSizeIncrement."\n";
+  while ($increment_part<$minSizeIncrement)
+  {
+    $contentIncrement=0;
+#	print STDERR "increment_part : $increment_part\n";
+    while ($contentIncrement< $maxContent)
+    {
+#      print STDERR "contentIncrement : $contentIncrement\n";
+      my $line="";
+      my $featureLine="";
+      my $createLines=1;
+      $newFileIncrement=0;
+      while($newFileIncrement< $fileIncrement)
+      {
+#	print STDERR "newFileIncrement : $newFileIncrement\n";
+	 if (rindex($scorer_content[$newFileIncrement][$increment_part][$contentIncrement],"BEGIN")<0)
+	 {
+	         if (rindex($line,"SCORES_TXT_END")>-1)
+        	{
+#	            $line=$line;
+#	            chomp($line);
+	         }
+		elsif (rindex($scorer_content[$newFileIncrement][$increment_part][$contentIncrement],"SCORES_TXT_END")>-1)
+     		{
+			$line=$scorer_content[$newFileIncrement][$increment_part][$contentIncrement];
+			$featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
+		}
+	    else
+		{
+			$line=$line." ".$scorer_content[$newFileIncrement][$increment_part][$contentIncrement];
+			chomp($line);
+			if (length($featureLine)>0 && rindex($featureLine,$feature_content[$newFileIncrement][$increment_part][$contentIncrement])==0)
+			{
+			    $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
+			    chomp($featureLine);
+			}
+			elsif (length($featureLine)>0)
+			{
+	    # 		$createLines=0;
+			    my @split_line=split(/[\s]+/,$featureLine);
+			    my @split_line_input=split(/[\s]+/,$feature_content[$newFileIncrement][$increment_part][$contentIncrement]);
+			    my $i=0;
+			    $featureLine="";
+			    for ($i=0;$i<scalar(@split_line_input);$i++)
+			    {
+				$split_line_input[$i]=($split_line_input[$i]+$split_line[$i])/2;
+				$featureLine=$featureLine.$split_line_input[$i]." ";
+			    }
+			}
+			elsif (length($featureLine)==0)
+			{
+			    $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
+			    chomp($featureLine);
+			}
+	        }
+	 }
+	else
+	 {
+	    my @split_line_input=split(" ",$scorer_content[$newFileIncrement][$increment_part][$contentIncrement]);
+	    my @split_line_feat_input=split(/[\s]+/,$feature_content[$newFileIncrement][$increment_part][$contentIncrement]);
+	    my @split_line=split(" ",$line);
+	    if (scalar(@split_line)>4)
+	    {
+		$split_line_input[3]=$split_line[3]+$split_line_input[3];
+	    }
+	    if (scalar(@split_line_input)>4)
+	    {
+		if (scalar(@split_line)>4)
+		{
+			if ($split_line[2]<$split_line_input[2])
+			{
+				$split_line_input[2]=$split_line[2];
+			}
+		}
+		else
+		{
+			## Nothing to do
+		}
+		$maxContent=$split_line_input[2]+2;
+#                print STDERR "maxContent : $maxContent : ".$scorer_content[$newFileIncrement][$increment_part][$contentIncrement]."\n";
+	    }
+	    else
+	{
+		die "scoreFile bad format : ".$scorer_content[$newFileIncrement][$increment_part][$contentIncrement]."\n";
+	}
+	    $line=$split_line_input[0]." ".$split_line_input[1]." ".$split_line_input[2]." ".$split_line_input[3]." MERGE";
+	    my $i=0;
+	    $featureLine="";
+	    for ($i=0;$i<scalar(@split_line_feat_input);$i++)
+	    {
+# 		$split_line_feat_input[$i]=($split_line_input[$i]+$split_line[$i])/2;
+		if ($i==2)
+		{
+		    $featureLine=$featureLine.$split_line_input[2]." ";
+		}
+		else
+		{
+		    $featureLine=$featureLine.$split_line_feat_input[$i]." ";
+		}
+	    }
+# 	    $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
+	  }
+	 $newFileIncrement++;
+      }
+      $line=~s/^[ ]+//g;
+      $line=~s/[ ]+$//g;
+      $line=~s/[ ]+/ /g;
+#      $line=~s/( SCORES_TXT_END[^!]*)//g;
+#       print STDERR $line."\n";
+#       if ($createLines>0)
+#       {
+	  print SCOREFILE $line."\n";
+	  print FEATFILE $featureLine."\n";
+#       }
+      $contentIncrement++;
+    }
+  $increment_part++;
+  }
+  close(SCOREFILE);
+  close(FEATFILE);
+#   `/bin/cp `
+#   $cmd="$mertdir/mergeWeights -c merge.init -s $score_file -f $feature_file";
+#   print STDERR "executing : $cmd\n";
+#   if (defined $___JOBS) {
+#     safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=mergeWeight.out.MERGE -stderr=mergeWeight.err.MERGE" )
+#       or die "MERGE Failed to submit extraction to queue (via $qsubwrapper)";
+#   } else {
+#     safesystem("$cmd > mergeWeight.out.MERGE 2> mergeWeight.err.MERGE") or die "MERGE Failed to do extraction of statistics.";
+#   }
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+#   my $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file --ffile $feature_file -r ".join(",", @references)." -n $nbest_file";
+#   &submit_or_exec($cmd,"extract.out","extract.err");
+  # Create the initial weights file for mert: init.opt
+  my @MIN = @{$featlist->{"mins"}};
+  my @MAX = @{$featlist->{"maxs"}};
+  my @CURR = @{$featlist->{"values"}};
+  my @NAME = @{$featlist->{"names"}};
+  open(OUT,"> $weights_in_file")
+    or die "Can't write $weights_in_file (WD now $___WORKING_DIR)";
+  print OUT join(" ", @CURR)."\n";
+  print OUT join(" ", @MIN)."\n";  # this is where we could pass MINS
+  print OUT join(" ", @MAX)."\n";  # this is where we could pass MAXS
+  close(OUT);
+  # print join(" ", @NAME)."\n";
+  # make a backup copy labelled with this run number
+  safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
+  my $DIM = scalar(@CURR); # number of lambdas
+  # run mert
+  $cmd = "$mert_mert_cmd -d $DIM $mert_mert_args";
+  my $mert_settings = " -n $___RANDOM_RESTARTS";
+  if ($___PREDICTABLE_SEEDS) {
+      my $seed = $run * 1000;
+      $mert_settings .= " -r $seed";
+  }
+  if ($___RANDOM_DIRECTIONS) {
+    if ($___NUM_RANDOM_DIRECTIONS == 0) {
+      $mert_settings .= " -m 50";
+    }
+    $mert_settings .= " -t random-direction";
+  }
+  if ($___NUM_RANDOM_DIRECTIONS) {
+    $mert_settings .= " -m $___NUM_RANDOM_DIRECTIONS";
+  }
+  if ($__THREADS) {
+    $mert_settings .= " --threads $__THREADS";
+  }
+  my $file_settings = "";
+  if (defined $prev_feature_file) {
+    $file_settings .= " --ffile $prev_feature_file,$feature_file";
+  }
+  else{
+    $file_settings .= " --ffile $feature_file";
+  }
+  if (defined $prev_score_file) {
+    $file_settings .= " --scfile $prev_score_file,$score_file";
+  }
+  else{
+    $file_settings .= " --scfile $score_file";
+  }
+  if ($___START_WITH_HISTORIC_BESTS && defined $prev_init_file) {
+    $file_settings .= " --ifile $prev_init_file,run$run.$weights_in_file";
+  }
+  else{
+    $file_settings .= " --ifile run$run.$weights_in_file";
+  }
+  $cmd .= $file_settings;
+  # pro optimization
+  if ($___PAIRWISE_RANKED_OPTIMIZER) {
+    $cmd .= " --pro run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data";
+    &submit_or_exec($cmd,$mert_outfile,$mert_logfile);
+  }
+  # first pro, then mert
+  elsif ($___PRO_STARTING_POINT) {
+    # run pro...
+    my $pro_cmd = $cmd." --pro run$run.pro.data ; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data";
+    &submit_or_exec($pro_cmd,"run$run.pro.out","run$run.pro.err");
+    # ... get results ...
+    my %dummy;
+    ($bestpoint,$devbleu) = &get_weights_from_mert("run$run.pro.out","run$run.pro.err",scalar @{$featlist->{"names"}},\%dummy);
+    open(PRO_START,">run$run.init.pro");
+    print PRO_START $bestpoint."\n";
+    close(PRO_START);
+    # ... and run mert
+    $cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
+    &submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
+  }
+  # just mert
+  else {
+    &submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
+  }
+  die "Optimization failed, file $weights_out_file does not exist or is empty"
+    if ! -s $weights_out_file;
+  # backup copies
+  foreach my $extractFiles(`/bin/ls extract.*`)
+  {
+    chomp $extractFiles;
+    safesystem ("\\cp -f $extractFiles run$run.$extractFiles") or die;
+  }
+#  safesystem ("\\cp -f extract.err run$run.extract.err") or die;
+#  safesystem ("\\cp -f extract.out run$run.extract.out") or die;
+  safesystem ("\\cp -f $mert_outfile run$run.$mert_outfile") or die;
+  safesystem ("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
+  safesystem ("touch $mert_logfile run$run.$mert_logfile") or die;
+  safesystem ("\\cp -f $weights_out_file run$run.$weights_out_file") or die; # this one is needed for restarts, too
+  print "run $run end at ".`date`;
+  my %sparse_weights; # sparse features
+  ($bestpoint,$devbleu) = &get_weights_from_mert("run$run.$mert_outfile","run$run.$mert_logfile",scalar @{$featlist->{"names"}},\%sparse_weights);
+  die "Failed to parse mert.log, missed Best point there."
+    if !defined $bestpoint || !defined $devbleu;
+  print "($run) BEST at $run: $bestpoint => $devbleu at ".`date`;
+  # update my cache of lambda values
+  my @newweights = split /\s+/, $bestpoint;
+  # interpolate with prior's interation weight, if historic-interpolation is specified
+  if ($___HISTORIC_INTERPOLATION>0 && $run>3) {
+    my %historic_sparse_weights;
+    if (-e "run$run.sparse-weights") {
+      open(SPARSE,"run$run.sparse-weights");
+      while(<SPARSE>) {
+        chop;
+        my ($feature,$weight) = split;
+        $historic_sparse_weights{$feature} = $weight;
+      }
+    }
+    my $prev = $run-1;
+    my @historic_weights = split /\s+/, `cat run$prev.$weights_out_file`;
+    for(my $i=0;$i<scalar(@newweights);$i++) {
+      $newweights[$i] = $___HISTORIC_INTERPOLATION * $newweights[$i] + (1-$___HISTORIC_INTERPOLATION) * $historic_weights[$i];
+    }
+    print "interpolate with ".join(",",@historic_weights)." to ".join(",",@newweights);
+    foreach (keys %sparse_weights) {
+      $sparse_weights{$_} *= $___HISTORIC_INTERPOLATION;
+      #print STDERR "sparse_weights{$_} *= $___HISTORIC_INTERPOLATION -> $sparse_weights{$_}\n";
+    }
+    foreach (keys %historic_sparse_weights) {
+      $sparse_weights{$_} += (1-$___HISTORIC_INTERPOLATION) * $historic_sparse_weights{$_};
+      #print STDERR "sparse_weights{$_} += (1-$___HISTORIC_INTERPOLATION) * $historic_sparse_weights{$_} -> $sparse_weights{$_}\n";
+    }
+  }
+  if ($___HISTORIC_INTERPOLATION>0) {
+    open(WEIGHTS,">run$run.$weights_out_file");
+    print WEIGHTS join(" ",@newweights);
+    close(WEIGHTS);
+  }
+  $featlist->{"values"} = \@newweights;
+  if (scalar keys %sparse_weights) {
+    $sparse_weights_file = "run".($run+1).".sparse-weights";
+    open(SPARSE,">".$sparse_weights_file);
+    foreach my $feature (keys %sparse_weights) {
+      print SPARSE "$feature $sparse_weights{$feature}\n";
+    }
+    close(SPARSE);
+  }
+  ## additional stopping criterion: weights have not changed
+  my $shouldstop = 1;
+  for(my $i=0; $i<@CURR; $i++) {
+    die "Lost weight! mert reported fewer weights (@newweights) than we gave it (@CURR)"
+      if !defined $newweights[$i];
+    if (abs($CURR[$i] - $newweights[$i]) >= $minimum_required_change_in_weights) {
+      $shouldstop = 0;
+      last;
+    }
+  }
+  open F, "> finished_step.txt" or die "Can't mark finished step";
+  print F $run."\n";
+  close F;
+  if ($shouldstop) {
+    print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n";
+    last;
+  }
+  my $firstrun;
+  if ($prev_aggregate_nbl_size==-1){
+    $firstrun=1;
+  }
+  else{
+    $firstrun=$run-$prev_aggregate_nbl_size+1;
+    $firstrun=($firstrun>0)?$firstrun:1;
+  }
+  print "loading data from $firstrun to $run (prev_aggregate_nbl_size=$prev_aggregate_nbl_size)\n";
+  $prev_feature_file = undef;
+  $prev_score_file = undef;
+  $prev_init_file = undef;
+  for (my $i=$firstrun;$i<=$run;$i++){
+    if (defined $prev_feature_file){
+      $prev_feature_file = "${prev_feature_file},run${i}.${base_feature_file}";
+    }
+    else{
+      $prev_feature_file = "run${i}.${base_feature_file}";
+    }
+    if (defined $prev_score_file){
+      $prev_score_file = "${prev_score_file},run${i}.${base_score_file}";
+    }
+    else{
+      $prev_score_file = "run${i}.${base_score_file}";
+    }
+    if (defined $prev_init_file){
+      $prev_init_file = "${prev_init_file},run${i}.${weights_in_file}";
+    }
+    else{
+      $prev_init_file = "run${i}.${weights_in_file}";
+    }
+  }
+  print "loading data from $prev_feature_file\n" if defined($prev_feature_file);
+  print "loading data from $prev_score_file\n" if defined($prev_score_file);
+  print "loading data from $prev_init_file\n" if defined($prev_init_file);
+}
+print "Training finished at ".`date`;
+if (defined $allsorted){ safesystem ("\\rm -f $allsorted") or die; };
+safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
+safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
+create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu);
+# just to be sure that we have the really last finished step marked
+open F, "> finished_step.txt" or die "Can't mark finished step";
+print F $run."\n";
+close F;
+#chdir back to the original directory # useless, just to remind we were not there
+chdir($cwd);
+} # end of local scope
+sub get_weights_from_mert {
+  my ($outfile,$logfile,$weight_count,$sparse_weights) = @_;
+  my ($bestpoint,$devbleu);
+  if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)) {
+    open(IN,$outfile) or die "Can't open $outfile";
+    my (@WEIGHT,$sum);
+    for(my $i=0;$i<$weight_count;$i++) { push @WEIGHT, 0; }
+    while(<IN>) {
+      # regular features
+      if (/^F(\d+) ([\-\.\de]+)/) {
+        $WEIGHT[$1] = $2;
+        $sum += abs($2);
+      }
+      # sparse features
+      elsif(/^(.+_.+) ([\-\.\de]+)/) {
+        $$sparse_weights{$1} = $2;
+      }
+    }
+    $devbleu = "unknown";
+    foreach (@WEIGHT) { $_ /= $sum; }
+    foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
+    $bestpoint = join(" ",@WEIGHT);
+    close IN;
+  }
+  else {
+    open(IN,$logfile) or die "Can't open $logfile";
+    while (<IN>) {
+      if (/Best point:\s*([\s\d\.\-e]+?)\s*=> ([\-\d\.]+)/) {
+        $bestpoint = $1;
+        $devbleu = $2;
+        last;
+      }
+    }
+    close IN;
+  }
+  return ($bestpoint,$devbleu);
+}
+sub run_decoder {
+    my ($featlist, $run, $need_to_normalize) = @_;
+    my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";
+    my $filename = sprintf($filename_template, $run);
+    # user-supplied parameters
+    print "params = $___DECODER_FLAGS\n";
+    # parameters to set all model weights (to override moses.ini)
+    my @vals = @{$featlist->{"values"}};
+    if ($need_to_normalize) {
+      print STDERR "Normalizing lambdas: @vals\n";
+      my $totlambda=0;
+      grep($totlambda+=abs($_),@vals);
+      grep($_/=$totlambda,@vals);
+    }
+    # moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y"
+    my %model_weights;
+    for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+      my $name = $featlist->{"names"}->[$i];
+      $model_weights{$name} = "-$name" if !defined $model_weights{$name};
+      $model_weights{$name} .= sprintf " %.6f", $vals[$i];
+    }
+    my $decoder_config = join(" ", values %model_weights);
+    print STDERR "DECODER_CFG = $decoder_config\n";
+    print "decoder_config = $decoder_config\n";
+    # run the decoder
+    my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE";
+    my $decoder_cmd;
+    if (defined $___JOBS && $___JOBS > 0) {
+      $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
+    } else {
+      $decoder_cmd = "$___DECODER $___DECODER_FLAGS  -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out";
+    }
+    safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";
+    sanity_check_order_of_lambdas($featlist, $filename);
+    return $filename;
+}
+sub insert_ranges_to_featlist {
+  my $featlist = shift;
+  my $ranges = shift;
+  $ranges = [] if !defined $ranges;
+  # first collect the ranges from options
+  my $niceranges;
+  foreach my $range (@$ranges) {
+    my $name = undef;
+    foreach my $namedpair (split /,/, $range) {
+      if ($namedpair =~ /^(.*?):/) {
+        $name = $1;
+        $namedpair =~ s/^.*?://;
+        die "Unrecognized name '$name' in --range=$range"
+          if !defined $ABBR2FULL{$name};
+      }
+      my ($min, $max) = split /\.\./, $namedpair;
+      die "Bad min '$min' in --range=$range" if $min !~ /^-?[0-9.]+$/;
+      die "Bad max '$max' in --range=$range" if $min !~ /^-?[0-9.]+$/;
+      die "No name given in --range=$range" if !defined $name;
+      push @{$niceranges->{$name}}, [$min, $max];
+    }
+  }
+  # now populate featlist
+  my $seen = undef;
+  for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+    my $name = $featlist->{"names"}->[$i];
+    $seen->{$name} ++;
+    my $min = 0.0;
+    my $max = 1.0;
+    if (defined $niceranges->{$name}) {
+      my $minmax = shift @{$niceranges->{$name}};
+      ($min, $max) = @$minmax if defined $minmax;
+    }
+    $featlist->{"mins"}->[$i] = $min;
+    $featlist->{"maxs"}->[$i] = $max;
+  }
+  return $featlist;
+}
+sub sanity_check_order_of_lambdas {
+  my $featlist = shift;
+  my $filename_or_stream = shift;
+  my @expected_lambdas = @{$featlist->{"names"}};
+  my @got = get_order_of_scores_from_nbestlist($filename_or_stream);
+  die "Mismatched lambdas. Decoder returned @got, we expected @expected_lambdas"
+    if "@got" ne "@expected_lambdas";
+}
+sub get_featlist_from_moses {
+  # run moses with the given config file and return the list of features and
+  # their initial values
+  my $configfn = shift;
+  my $featlistfn = "./features.list";
+  if (-e $featlistfn) {
+    print STDERR "Using cached features list: $featlistfn\n";
+  } else {
+    print STDERR "Asking moses for feature names and values from $___CONFIG\n";
+    my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn  -inputtype $___INPUTTYPE -show-weights > $featlistfn";
+    safesystem($cmd) or die "Failed to run moses with the config $configfn";
+  }
+  # read feature list
+  my @names = ();
+  my @startvalues = ();
+  open(INI,$featlistfn) or die "Can't read $featlistfn";
+  my $nr = 0;
+  my @errs = ();
+  while (<INI>) {
+    $nr++;
+    chomp;
+    /^(.+) (\S+) (\S+)$/ || die("invalid feature: $_");
+    my ($longname, $feature, $value) = ($1,$2,$3);
+    next if $value eq "sparse";
+    push @errs, "$featlistfn:$nr:Bad initial value of $feature: $value\n"
+      if $value !~ /^[+-]?[0-9.e]+$/;
+    push @errs, "$featlistfn:$nr:Unknown feature '$feature', please add it to \@ABBR_FULL_MAP\n"
+      if !defined $ABBR2FULL{$feature};
+    push @names, $feature;
+    push @startvalues, $value;
+  }
+  close INI;
+  if (scalar @errs) {
+    print STDERR join("", @errs);
+    exit 1;
+  }
+  return {"names"=>\@names, "values"=>\@startvalues};
+}
+sub get_order_of_scores_from_nbestlist {
+  # read the first line and interpret the ||| label: num num num label2: num ||| column in nbestlist
+  # return the score labels in order
+  my $fname_or_source = shift;
+  # print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
+  open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'";
+  my $line = <IN>;
+  close IN;
+  die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
+  my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line;
+  $scores =~ s/^\s*|\s*$//g;
+  die "No scores in line: $line" if $scores eq "";
+  my @order = ();
+  my $label = undef;
+  my $sparse = 0; # we ignore sparse features here
+  foreach my $tok (split /\s+/, $scores) {
+    if ($tok =~ /.+_.+:/) {
+      $sparse = 1;
+    } elsif ($tok =~ /^([a-z][0-9a-z]*):/i) {
+      $label = $1;
+    } elsif ($tok =~ /^-?[-0-9.e]+$/) {
+      if (!$sparse) {
+        # a score found, remember it
+        die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!"
+          if !defined $label;
+        push @order, $label;
+      }
+      $sparse = 0;
+    } else {
+      die "Not a label, not a score '$tok'. Failed to parse the scores string: '$scores' of nbestlist '$fname_or_source'";
+    }
+  }
+  print STDERR "The decoder returns the scores in this order: @order\n";
+  return @order;
+}
+sub create_config {
+    my $infn = shift; # source config
+    my $outfn = shift; # where to save the config
+    my $featlist = shift; # the lambdas we should write
+    my $iteration = shift;  # just for verbosity
+    my $bleu_achieved = shift; # just for verbosity
+    my $sparse_weights_file = shift; # only defined when optimizing sparse features
+    my %P; # the hash of all parameters we wish to override
+    # first convert the command line parameters to the hash
+    { # ensure local scope of vars
+	my $parameter=undef;
+	print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
+        $___DECODER_FLAGS =~ s/^\s*|\s*$//;
+        $___DECODER_FLAGS =~ s/\s+/ /;
+	foreach (split(/ /,$___DECODER_FLAGS)) {
+	    if (/^\-([^\d].*)$/) {
+		$parameter = $1;
+		$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
+	    }
+	    else {
+                die "Found value with no -paramname before it: $_"
+                  if !defined $parameter;
+		push @{$P{$parameter}},$_;
+	    }
+	}
+    }
+    # First delete all weights params from the input, we're overwriting them.
+    # Delete both short and long-named version.
+    for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+      my $name = $featlist->{"names"}->[$i];
+      delete($P{$name});
+      delete($P{$ABBR2FULL{$name}});
+    }
+    # Convert weights to elements in P
+    for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
+      my $name = $featlist->{"names"}->[$i];
+      my $val = $featlist->{"values"}->[$i];
+      $name = defined $ABBR2FULL{$name} ? $ABBR2FULL{$name} : $name;
+        # ensure long name
+      push @{$P{$name}}, $val;
+    }
+    if (defined($sparse_weights_file)) {
+      push @{$P{"weights-file"}}, $___WORKING_DIR."/".$sparse_weights_file;
+    }
+    # create new moses.ini decoder config file by cloning and overriding the original one
+    open(INI,$infn) or die "Can't read $infn";
+    delete($P{"config"}); # never output
+    print "Saving new config to: $outfn\n";
+    open(OUT,"> $outfn") or die "Can't write $outfn";
+    print OUT "# MERT optimized configuration\n";
+    print OUT "# decoder $___DECODER\n";
+    print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n";
+    print OUT "# We were before running iteration $iteration\n";
+    print OUT "# finished ".`date`;
+    my $line = <INI>;
+    while(1) {
+	last unless $line;
+	# skip until hit [parameter]
+	if ($line !~ /^\[(.+)\]\s*$/) {
+	    $line = <INI>;
+	    print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
+	    next;
+	}
+	# parameter name
+	my $parameter = $1;
+	$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
+	print OUT "[$parameter]\n";
+	# change parameter, if new values
+	if (defined($P{$parameter})) {
+	    # write new values
+	    foreach (@{$P{$parameter}}) {
+		print OUT $_."\n";
+	    }
+	    delete($P{$parameter});
+	    # skip until new parameter, only write comments
+	    while($line = <INI>) {
+		print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
+		last if $line =~ /^\[/;
+		last unless $line;
+	    }
+	    next;
+	}
+	# unchanged parameter, write old
+	while($line = <INI>) {
+	    last if $line =~ /^\[/;
+	    print OUT $line;
+	}
+    }
+    # write all additional parameters
+    foreach my $parameter (keys %P) {
+	print OUT "\n[$parameter]\n";
+	foreach (@{$P{$parameter}}) {
+	    print OUT $_."\n";
+	}
+    }
+    close(INI);
+    close(OUT);
+    print STDERR "Saved: $outfn\n";
+}
+sub safesystem {
+  print STDERR "Executing: @_\n";
+  system(@_);
+  if ($? == -1) {
+      print STDERR "Failed to execute: @_\n  $!\n";
+      exit(1);
+  }
+  elsif ($? & 127) {
+      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
+          ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+  }
+  else {
+    my $exitcode = $? >> 8;
+    print STDERR "Exit code: $exitcode\n" if $exitcode;
+    return ! $exitcode;
+  }
+}
+sub ensure_full_path {
+    my $PATH = shift;
+$PATH =~ s/\/nfsmnt//;
+    return $PATH if $PATH =~ /^\//;
+    my $dir = `pawd 2>/dev/null`;
+    if(!$dir){$dir = `pwd`;}
+    chomp($dir);
+    $PATH = $dir."/".$PATH;
+    $PATH =~ s/[\r\n]//g;
+    $PATH =~ s/\/\.\//\//g;
+    $PATH =~ s/\/+/\//g;
+    my $sanity = 0;
+    while($PATH =~ /\/\.\.\// && $sanity++<10) {
+        $PATH =~ s/\/+/\//g;
+        $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
+    }
+    $PATH =~ s/\/[^\/]+\/\.\.$//;
+    $PATH =~ s/\/+$//;
+$PATH =~ s/\/nfsmnt//;
+    return $PATH;
+}
+sub submit_or_exec {
+  my ($cmd,$stdout,$stderr) = @_;
+  print STDERR "exec: $cmd\n";
+  if (defined $___JOBS && $___JOBS > 0) {
+    safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=$stdout -stderr=$stderr" )
+      or die "ERROR: Failed to submit '$cmd' (via $qsubwrapper)";
+  }
+  else {
+    safesystem("$cmd > $stdout 2> $stderr") or die "ERROR: Failed to run '$cmd'.";
+  }
+}

mosesdecoder/contrib/relent-filter/src/IOWrapper.cpp ADDED Viewed

	@@ -0,0 +1,580 @@

+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+			this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+			this list of conditions and the following disclaimer in the documentation
+			and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+			may be used to endorse or promote products derived from this software
+			without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+// example file on how to use moses library
+#include <iostream>
+#include <stack>
+#include "TypeDef.h"
+#include "Util.h"
+#include "IOWrapper.h"
+#include "Hypothesis.h"
+#include "WordsRange.h"
+#include "TrellisPathList.h"
+#include "StaticData.h"
+#include "DummyScoreProducers.h"
+#include "InputFileStream.h"
+using namespace std;
+using namespace Moses;
+namespace MosesCmd
+{
+IOWrapper::IOWrapper(
+  const vector<FactorType>				&inputFactorOrder
+  , const vector<FactorType>			&outputFactorOrder
+  , const FactorMask							&inputFactorUsed
+  , size_t												nBestSize
+  , const string									&nBestFilePath)
+  :m_inputFactorOrder(inputFactorOrder)
+  ,m_outputFactorOrder(outputFactorOrder)
+  ,m_inputFactorUsed(inputFactorUsed)
+  ,m_inputFile(NULL)
+  ,m_inputStream(&std::cin)
+  ,m_nBestStream(NULL)
+  ,m_outputWordGraphStream(NULL)
+  ,m_outputSearchGraphStream(NULL)
+  ,m_detailedTranslationReportingStream(NULL)
+  ,m_alignmentOutputStream(NULL)
+{
+  Initialization(inputFactorOrder, outputFactorOrder
+                 , inputFactorUsed
+                 , nBestSize, nBestFilePath);
+}
+IOWrapper::IOWrapper(const std::vector<FactorType>	&inputFactorOrder
+                     , const std::vector<FactorType>	&outputFactorOrder
+                     , const FactorMask							&inputFactorUsed
+                     , size_t												nBestSize
+                     , const std::string							&nBestFilePath
+                     , const std::string							&inputFilePath)
+  :m_inputFactorOrder(inputFactorOrder)
+  ,m_outputFactorOrder(outputFactorOrder)
+  ,m_inputFactorUsed(inputFactorUsed)
+  ,m_inputFilePath(inputFilePath)
+  ,m_inputFile(new InputFileStream(inputFilePath))
+  ,m_nBestStream(NULL)
+  ,m_outputWordGraphStream(NULL)
+  ,m_outputSearchGraphStream(NULL)
+  ,m_detailedTranslationReportingStream(NULL)
+  ,m_alignmentOutputStream(NULL)
+{
+  Initialization(inputFactorOrder, outputFactorOrder
+                 , inputFactorUsed
+                 , nBestSize, nBestFilePath);
+  m_inputStream = m_inputFile;
+}
+IOWrapper::~IOWrapper()
+{
+  if (m_inputFile != NULL)
+    delete m_inputFile;
+  if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
+    // outputting n-best to file, rather than stdout. need to close file and delete obj
+    delete m_nBestStream;
+  }
+  if (m_outputWordGraphStream != NULL) {
+    delete m_outputWordGraphStream;
+  }
+  if (m_outputSearchGraphStream != NULL) {
+    delete m_outputSearchGraphStream;
+  }
+  delete m_detailedTranslationReportingStream;
+  delete m_alignmentOutputStream;
+}
+void IOWrapper::Initialization(const std::vector<FactorType>	&/*inputFactorOrder*/
+                               , const std::vector<FactorType>			&/*outputFactorOrder*/
+                               , const FactorMask							&/*inputFactorUsed*/
+                               , size_t												nBestSize
+                               , const std::string							&nBestFilePath)
+{
+  const StaticData &staticData = StaticData::Instance();
+  // n-best
+  m_surpressSingleBestOutput = false;
+  if (nBestSize > 0) {
+    if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout") {
+      m_nBestStream = &std::cout;
+      m_surpressSingleBestOutput = true;
+    } else {
+      std::ofstream *file = new std::ofstream;
+      m_nBestStream = file;
+      file->open(nBestFilePath.c_str());
+    }
+  }
+  // wordgraph output
+  if (staticData.GetOutputWordGraph()) {
+    string fileName = staticData.GetParam("output-word-graph")[0];
+    std::ofstream *file = new std::ofstream;
+    m_outputWordGraphStream  = file;
+    file->open(fileName.c_str());
+  }
+// search graph output
+  if (staticData.GetOutputSearchGraph()) {
+    string fileName;
+    if (staticData.GetOutputSearchGraphExtended())
+      fileName = staticData.GetParam("output-search-graph-extended")[0];
+    else
+      fileName = staticData.GetParam("output-search-graph")[0];
+    std::ofstream *file = new std::ofstream;
+    m_outputSearchGraphStream = file;
+    file->open(fileName.c_str());
+  }
+  // detailed translation reporting
+  if (staticData.IsDetailedTranslationReportingEnabled()) {
+    const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
+    m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
+    CHECK(m_detailedTranslationReportingStream->good());
+  }
+  // sentence alignment output
+  if (! staticData.GetAlignmentOutputFile().empty()) {
+    m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str());
+    CHECK(m_alignmentOutputStream->good());
+  }
+}
+InputType*IOWrapper::GetInput(InputType* inputType)
+{
+  if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
+    if (long x = inputType->GetTranslationId()) {
+      if (x>=m_translationId) m_translationId = x+1;
+    } else inputType->SetTranslationId(m_translationId++);
+    return inputType;
+  } else {
+    delete inputType;
+    return NULL;
+  }
+}
+/***
+ * print surface factor only for the given phrase
+ */
+void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+		   bool reportSegmentation, bool reportAllFactors)
+{
+  CHECK(outputFactorOrder.size() > 0);
+  const Phrase& phrase = edge.GetCurrTargetPhrase();
+  if (reportAllFactors == true) {
+    out << phrase;
+  } else {
+    size_t size = phrase.GetSize();
+    for (size_t pos = 0 ; pos < size ; pos++) {
+      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+      out << *factor;
+      CHECK(factor);
+      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+        CHECK(factor);
+        out << "|" << *factor;
+      }
+      out << " ";
+    }
+  }
+  // trace option "-t"
+  if (reportSegmentation == true && phrase.GetSize() > 0) {
+    out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
+	<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
+  }
+}
+void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
+                   bool reportSegmentation, bool reportAllFactors)
+{
+  if (hypo != NULL) {
+    // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+    OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
+    OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
+  }
+}
+void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
+{
+  typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+  AlignVec alignments = ai.GetSortedAlignments();
+  AlignVec::const_iterator it;
+  for (it = alignments.begin(); it != alignments.end(); ++it) {
+    const std::pair<size_t,size_t> &alignment = **it;
+    out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+  }
+}
+void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
+{
+  size_t targetOffset = 0;
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+    size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+    OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
+    targetOffset += tp.GetSize();
+  }
+  out << std::endl;
+}
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
+{
+  ostringstream out;
+  OutputAlignment(out, edges);
+  collector->Write(lineNo,out.str());
+}
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo)
+{
+  if (collector) {
+    std::vector<const Hypothesis *> edges;
+    const Hypothesis *currentHypo = hypo;
+    while (currentHypo) {
+      edges.push_back(currentHypo);
+      currentHypo = currentHypo->GetPrevHypo();
+    }
+    OutputAlignment(collector,lineNo, edges);
+  }
+}
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path)
+{
+  if (collector) {
+    OutputAlignment(collector,lineNo, path.GetEdges());
+  }
+}
+void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream &out)
+{
+  const std::vector<const Hypothesis *> &edges = path.GetEdges();
+  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+    const Hypothesis &edge = *edges[currEdge];
+    OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
+  }
+  out << endl;
+}
+void IOWrapper::Backtrack(const Hypothesis *hypo)
+{
+  if (hypo->GetPrevHypo() != NULL) {
+    VERBOSE(3,hypo->GetId() << " <= ");
+    Backtrack(hypo->GetPrevHypo());
+  }
+}
+void OutputBestHypo(const std::vector<Word>&  mbrBestHypo, long /*translationId*/, bool /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
+{
+  for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
+    const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+    CHECK(factor);
+    if (i>0) out << " " << *factor;
+    else     out << *factor;
+  }
+  out << endl;
+}
+void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
+{
+  if (hypo->GetPrevHypo()) {
+    OutputInput(map, hypo->GetPrevHypo());
+    map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
+  }
+}
+void OutputInput(std::ostream& os, const Hypothesis* hypo)
+{
+  size_t len = hypo->GetInput().GetSize();
+  std::vector<const Phrase*> inp_phrases(len, 0);
+  OutputInput(inp_phrases, hypo);
+  for (size_t i=0; i<len; ++i)
+    if (inp_phrases[i]) os << *inp_phrases[i];
+}
+void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
+{
+  if (hypo != NULL) {
+    VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
+    VERBOSE(3,"Best path: ");
+    Backtrack(hypo);
+    VERBOSE(3,"0" << std::endl);
+    if (!m_surpressSingleBestOutput) {
+      if (StaticData::Instance().IsPathRecoveryEnabled()) {
+        OutputInput(cout, hypo);
+        cout << "||| ";
+      }
+      OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
+      cout << endl;
+    }
+  } else {
+    VERBOSE(1, "NO BEST TRANSLATION" << endl);
+    if (!m_surpressSingleBestOutput) {
+      cout << endl;
+    }
+  }
+}
+void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
+{
+  const StaticData &staticData = StaticData::Instance();
+  bool labeledOutput = staticData.IsLabeledNBestList();
+  bool reportAllFactors = staticData.GetReportAllFactorsNBest();
+  bool includeAlignment = staticData.NBestIncludesAlignment();
+  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+  TrellisPathList::const_iterator iter;
+  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+    const TrellisPath &path = **iter;
+    const std::vector<const Hypothesis *> &edges = path.GetEdges();
+    // print the surface factor of the translation
+    out << translationId << " ||| ";
+    for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+      const Hypothesis &edge = *edges[currEdge];
+      OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
+    }
+    out << " |||";
+    std::string lastName = "";
+    const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
+    for( size_t i=0; i<sff.size(); i++ ) {
+      if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) {
+        lastName = sff[i]->GetScoreProducerWeightShortName();
+        out << " " << lastName << ":";
+      }
+      vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
+      for (size_t j = 0; j<scores.size(); ++j) {
+        out << " " << scores[j];
+      }
+    }
+    const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
+    for( size_t i=0; i<slf.size(); i++ ) {
+      if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) {
+        lastName = slf[i]->GetScoreProducerWeightShortName();
+        out << " " << lastName << ":";
+      }
+      vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
+      for (size_t j = 0; j<scores.size(); ++j) {
+        out << " " << scores[j];
+      }
+    }
+    // translation components
+    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
+    if (pds.size() > 0) {
+      for( size_t i=0; i<pds.size(); i++ ) {
+	size_t pd_numinputscore = pds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
+      }
+    }
+    // generation
+    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
+    if (gds.size() > 0) {
+      for( size_t i=0; i<gds.size(); i++ ) {
+	size_t pd_numinputscore = gds[i]->GetNumInputScores();
+	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
+	for (size_t j = 0; j<scores.size(); ++j){
+	  if (labeledOutput && (i == 0) ){
+	    if ((j == 0) || (j == pd_numinputscore)){
+	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
+	      out << " " << lastName << ":";
+	    }
+	  }
+	  out << " " << scores[j];
+	}
+      }
+    }
+    // total
+    out << " ||| " << path.GetTotalScore();
+    //phrase-to-phrase alignment
+    if (includeAlignment) {
+      out << " |||";
+      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+        const Hypothesis &edge = *edges[currEdge];
+        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+        WordsRange targetRange = path.GetTargetWordsRange(edge);
+        out << " " << sourceRange.GetStartPos();
+        if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+          out << "-" << sourceRange.GetEndPos();
+        }
+        out<< "=" << targetRange.GetStartPos();
+        if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+          out<< "-" << targetRange.GetEndPos();
+        }
+      }
+    }
+    if (includeWordAlignment) {
+      out << " ||| ";
+      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+        const Hypothesis &edge = *edges[currEdge];
+        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+        WordsRange targetRange = path.GetTargetWordsRange(edge);
+        const int sourceOffset = sourceRange.GetStartPos();
+        const int targetOffset = targetRange.GetStartPos();
+        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
+        OutputAlignment(out, ai, sourceOffset, targetOffset);
+      }
+    }
+    if (StaticData::Instance().IsPathRecoveryEnabled()) {
+      out << "|||";
+      OutputInput(out, edges[0]);
+    }
+    out << endl;
+  }
+  out <<std::flush;
+}
+void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId)
+{
+  for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
+    out << translationId;
+    out << " |||";
+    const vector<Word> mbrHypo = si->GetWords();
+    for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
+      const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+      if (i>0) out << " " << *factor;
+      else     out << *factor;
+    }
+    out << " |||";
+    out << " map: " << si->GetMapScore();
+    out << " w: " << mbrHypo.size();
+    const vector<float>& ngramScores = si->GetNgramScores();
+    for (size_t i = 0; i < ngramScores.size(); ++i) {
+      out << " " << ngramScores[i];
+    }
+    out << " ||| " << si->GetScore();
+    out << endl;
+  }
+}
+void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId)
+{
+  OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
+}
+bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
+{
+  delete source;
+  switch(inputType) {
+  case SentenceInput:
+    source = ioWrapper.GetInput(new Sentence);
+    break;
+  case ConfusionNetworkInput:
+    source = ioWrapper.GetInput(new ConfusionNet);
+    break;
+  case WordLatticeInput:
+    source = ioWrapper.GetInput(new WordLattice);
+    break;
+  default:
+    TRACE_ERR("Unknown input type: " << inputType << "\n");
+  }
+  return (source ? true : false);
+}
+IOWrapper *GetIOWrapper(const StaticData &staticData)
+{
+  IOWrapper *ioWrapper;
+  const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
+      ,&outputFactorOrder = staticData.GetOutputFactorOrder();
+  FactorMask inputFactorUsed(inputFactorOrder);
+  // io
+  if (staticData.GetParam("input-file").size() == 1) {
+    VERBOSE(2,"IO from File" << endl);
+    string filePath = staticData.GetParam("input-file")[0];
+    ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
+                              , staticData.GetNBestSize()
+                              , staticData.GetNBestFilePath()
+                              , filePath);
+  } else {
+    VERBOSE(1,"IO from STDOUT/STDIN" << endl);
+    ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
+                              , staticData.GetNBestSize()
+                              , staticData.GetNBestFilePath());
+  }
+  ioWrapper->ResetTranslationId();
+  IFVERBOSE(1)
+  PrintUserTime("Created input-output object");
+  return ioWrapper;
+}
+}

mosesdecoder/contrib/relent-filter/src/Jamfile ADDED Viewed

	@@ -0,0 +1,6 @@

+alias deps : ../../../moses/src//moses ;
+exe calcDivergence : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp RelativeEntropyCalc.cpp deps ;
+alias programs : calcDivergence ;

mosesdecoder/contrib/relent-filter/src/Main.h ADDED Viewed

	@@ -0,0 +1,39 @@

+/*********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+                        this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+                        this list of conditions and the following disclaimer in the documentation
+                        and/or other materials provided with the distribution.
+    * Neither the name of the University of Edinburgh nor the names of its contributors
+                        may be used to endorse or promote products derived from this software
+                        without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+#ifndef moses_cmd_Main_h
+#define moses_cmd_Main_h
+#include "StaticData.h"
+class IOWrapper;
+int main(int argc, char* argv[]);
+#endif

mosesdecoder/contrib/relent-filter/src/TranslationAnalysis.cpp ADDED Viewed

	@@ -0,0 +1,126 @@

+// $Id$
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include "StaticData.h"
+#include "Hypothesis.h"
+#include "TranslationAnalysis.h"
+using namespace Moses;
+namespace TranslationAnalysis
+{
+void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo)
+{
+  os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
+  std::vector<const Hypothesis*> translationPath;
+  while (hypo) {
+    translationPath.push_back(hypo);
+    hypo = hypo->GetPrevHypo();
+  }
+  std::reverse(translationPath.begin(), translationPath.end());
+  std::vector<std::string> droppedWords;
+  std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
+  if(tpi == translationPath.end())
+    return;
+  ++tpi;  // skip initial translation state
+  std::vector<std::string> sourceMap;
+  std::vector<std::string> targetMap;
+  std::vector<unsigned int> lmAcc(0);
+  size_t lmCalls = 0;
+  bool doLMStats = ((*tpi)->GetLMStats() != 0);
+  if (doLMStats)
+    lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
+  for (; tpi != translationPath.end(); ++tpi) {
+    std::ostringstream sms;
+    std::ostringstream tms;
+    std::string target = (*tpi)->GetTargetPhraseStringRep();
+    std::string source = (*tpi)->GetSourcePhraseStringRep();
+    WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
+    WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
+    const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
+    // language model backoff stats,
+    if (doLMStats) {
+      std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
+      std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
+      std::vector<unsigned int>::iterator acc = lmAcc.begin();
+      for (; i != lmstats.end(); ++i, ++acc) {
+        std::vector<unsigned int>::iterator j = i->begin();
+        lmCalls += i->size();
+        for (; j != i->end(); ++j) {
+          (*acc) += *j;
+        }
+      }
+    }
+    bool epsilon = false;
+    if (target == "") {
+      target="<EPSILON>";
+      epsilon = true;
+      droppedWords.push_back(source);
+    }
+    os	<< "         SOURCE: " << swr << " " << source << std::endl
+        << "  TRANSLATED AS: "               << target << std::endl
+        << "  WORD ALIGNED: " << alignmentInfo					<< std::endl;
+    size_t twr_i = twr.GetStartPos();
+    size_t swr_i = swr.GetStartPos();
+    if (!epsilon) {
+      sms << twr_i;
+    }
+    if (epsilon) {
+      tms << "del(" << swr_i << ")";
+    } else {
+      tms << swr_i;
+    }
+    swr_i++;
+    twr_i++;
+    for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
+      sms << '-' << twr_i;
+    }
+    for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
+      tms << '-' << swr_i;
+    }
+    if (!epsilon) targetMap.push_back(sms.str());
+    sourceMap.push_back(tms.str());
+  }
+  std::vector<std::string>::iterator si = sourceMap.begin();
+  std::vector<std::string>::iterator ti = targetMap.begin();
+  os << std::endl << "SOURCE/TARGET SPANS:";
+  os << std::endl << "  SOURCE:";
+  for (; si != sourceMap.end(); ++si) {
+    os << " " << *si;
+  }
+  os << std::endl << "  TARGET:";
+  for (; ti != targetMap.end(); ++ti) {
+    os << " " << *ti;
+  }
+  os << std::endl << std::endl;
+  if (doLMStats && lmCalls > 0) {
+    std::vector<unsigned int>::iterator acc = lmAcc.begin();
+    const LMList& lmlist = system->GetLanguageModels();
+    LMList::const_iterator i = lmlist.begin();
+    for (; acc != lmAcc.end(); ++acc, ++i) {
+      char buf[256];
+      sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
+      os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
+    }
+  }
+  if (droppedWords.size() > 0) {
+    std::vector<std::string>::iterator dwi = droppedWords.begin();
+    os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
+    for (; dwi != droppedWords.end(); ++dwi) {
+      os << "\tdropped=" << *dwi << std::endl;
+    }
+  }
+  os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
+  StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
+  os << std::endl;
+}
+}

mosesdecoder/contrib/relent-filter/src/mbr.h ADDED Viewed

	@@ -0,0 +1,28 @@

+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef moses_cmd_mbr_h
+#define moses_cmd_mbr_h
+const Moses::TrellisPath doMBR(const Moses::TrellisPathList& nBestList);
+void GetOutputFactors(const Moses::TrellisPath &path, std::vector <const Moses::Factor*> &translation);
+float calculate_score(const std::vector< std::vector<const Moses::Factor*> > & sents, int ref, int hyp,  std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats );
+#endif

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h ADDED Viewed

	@@ -0,0 +1,180 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_ARCHETYPESET_
+#define _NL_ARCHETYPESET_
+#include <boost/thread/thread.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/bind.hpp>
+#include "nl-minheap.h"
+////////////////////////////////////////////////////////////////////////////////
+template <class S, class T>
+class Scored : public T {
+ public:
+  S scr;
+  Scored<S,T> ( )           : T() , scr()  { }
+  Scored<S,T> ( S s )       : T() , scr(s) { }
+  Scored<S,T> ( S s, T& t ) : T(t), scr(s) { }
+  S& setScore()       { return scr; }
+  S  getScore() const { return scr; }
+};
+////////////////////////////////////////////////////////////////////////////////
+template<char* SD1,class I,char* SD2,class T,char* SD3>
+class Numbered : public T {
+ private:
+  // Data members...
+  I i;
+ public:
+  // Constructor / destructor methods...
+  Numbered<SD1,I,SD2,T,SD3> ( )                                         { }
+  Numbered<SD1,I,SD2,T,SD3> ( char* ps )                                { ps>>*this>>"\0"; }
+  Numbered<SD1,I,SD2,T,SD3> ( const I& iA, const T& tA ) : T(tA), i(iA) { }
+  // Specification methods...
+  I&       setNumber ( )       { return i; }
+  T&       setT      ( )       { return *this; }
+  // Extraction methods...
+  const I& getNumber ( ) const { return i; }
+  const T& getT      ( ) const { return *this; }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const Numbered<SD1,I,SD2,T,SD3>& rv ) { return  os<<SD1<<rv.i<<SD2<<rv.getT()<<SD3; }
+  friend String&  operator<< ( String& str, const Numbered<SD1,I,SD2,T,SD3>& rv ) { return str<<SD1<<rv.i<<SD2<<rv.getT()<<SD3; }
+  friend pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*> operator>> ( StringInput ps, Numbered<SD1,I,SD2,T,SD3>& rv ) { return pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*>(ps,&rv); }
+  friend StringInput    operator>> ( pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*> delimbuff, const char* psPostDelim ) {
+    return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>psPostDelim
+                            : delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>SD3>>psPostDelim );
+  }
+};
+////////////////////////////////////////////////////////////////////////////////
+template<class V>
+class ArchetypeSet : public multimap<typename V::ElementType,Numbered<psX,int,psBar,V,psX> > {
+ private:
+  // Static data members...
+  static const int FIRST_INDEX_TO_CHECK = 0;
+  typedef Numbered<psX,int,psBar,V,psX> NV;
+  typedef multimap<typename V::ElementType,NV> MapType;
+  // Data members...
+  MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > > hsivCalc;
+ public:
+  ArchetypeSet<V> ( ) { }
+  ArchetypeSet<V> ( const ArchetypeSet<V>& aa ) : MapType(aa) { cerr<<"\nCOPY!!!!\n\n"; }
+  ArchetypeSet<V>& operator= ( const ArchetypeSet<V>& aa ) { cerr<<"\nCOPY2!!!!\n\n"; MapType::operator=(aa); return *this; }
+  // Specification methods...
+  void add ( const V& );
+  // Extraction methods...
+  bool isEmpty ( ) const { return MapType::empty(); }
+  pair<typename V::ElementType,int> getDistanceOfNearest ( const V& ) const;
+  void dump ( ) { for(typename MapType::const_iterator ii=MapType::begin(); ii!=MapType::end(); ii++) cerr<<ii->second<<"\n"; }
+};
+////////////////////
+template<class V>
+void ArchetypeSet<V>::add ( const V& v ) {
+  //cerr<<"adding "<<v.get(FIRST_INDEX_TO_CHECK)<<" "<<MapType::size()<<" "<<v<<"\n";
+  MapType::insert ( pair<typename V::ElementType,NV>(v.get(FIRST_INDEX_TO_CHECK),NV(MapType::size()+1,v) ) );
+  ////cerr<<"adding "<<v.second.get(1)<<" ln"<<MapType::lower_bound(v.second.get(1))->second.lineNum.toInt()<<"\n";
+}
+////////////////////
+template<class V>
+pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const V& v ) const {
+  //const Scored<typename V::ElementType,pair<int,SafePtr<const V> > > sipvDummy ( DBL_MAX );
+  //MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const V> > > > hsiv ( MapType::size()+1, sipvDummy );
+  MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >& hsiv =
+    const_cast<MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >&> ( hsivCalc );
+  hsiv.clear();
+  typename MapType::const_iterator iUpper = MapType::upper_bound(v.get(FIRST_INDEX_TO_CHECK));
+  typename MapType::const_iterator iLower = iUpper; if(iLower!=MapType::begin())iLower--;
+  ////cerr<<"seeking "<<v.get(0)<<" (upper=ln"<<(&iUpper->second)<<" "<<((iUpper!=MapType::end())?iUpper->first:-1)<<", lower=ln"<<&iLower->second<<" "<<iLower->first<<")\n";
+  int iNext = 0;
+  if ( iUpper!=MapType::end() ) {
+    hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
+    hsiv.set(iNext).second = SafePtr<const NV> ( iUpper->second );
+    typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
+    hsiv.set(iNext).setScore() = d;
+    //hsiv.set(iNext).setScore() = v.getMarginalDistance ( hsiv.getMin().first, iUpper->second.second );
+    ////int j =
+    hsiv.fixDecr(iNext);
+    ////cerr<<"    adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
+    iNext++;
+    ////for(int i=0;i<iNext;i++) cerr<<"      "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
+  }
+  hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
+  hsiv.set(iNext).second = SafePtr<const NV> ( iLower->second );
+  typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
+  hsiv.set(iNext).setScore() = d;
+  //hsiv.set(iNext).setScore() = v.getMarginalDistance ( hsiv.getMin().first, iLower->second.second );
+  ////int j =
+  hsiv.fixDecr(iNext);
+  ////cerr<<"    adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
+  iNext++;
+  ////for(int i=0;i<iNext;i++) cerr<<"      "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
+  while ( hsiv.getMin().first < V::SIZE-1 ) {
+    typename V::ElementType d = v.getMarginalDistance ( ++hsiv.setMin().first, hsiv.getMin().second.getRef() );
+    hsiv.setMin().setScore() += d;
+    ////cerr<<" matching ln"<<&hsiv.getMin().second.getRef()<<" i="<<hsiv.setMin().first<<" marg-dist="<<d<<" new-score="<<hsiv.getMin().getScore();
+    ////int j =
+    hsiv.fixIncr(0);
+    ////cerr<<" new-pos="<<j<<"\n";
+    ////if(j!=0) for(int i=0;i<iNext;i++) cerr<<"      "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
+    if ( iUpper!=MapType::end() && &hsiv.getMin().second.getRef() == &iUpper->second ) {
+      iUpper++;
+      if ( iUpper!=MapType::end() ) {
+        hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
+        hsiv.set(iNext).second = SafePtr<const NV> ( iUpper->second );
+        typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
+        hsiv.set(iNext).setScore() = d;
+        ////int j =
+        hsiv.fixDecr(iNext);
+        ////cerr<<"    adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
+        iNext++;
+        ////for(int i=0;i<iNext;i++) cerr<<"      "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
+      }
+    }
+    if ( iLower!=MapType::begin() && &hsiv.getMin().second.getRef() == &iLower->second ) {
+      iLower--;
+      hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
+      hsiv.set(iNext).second = SafePtr<const NV> ( iLower->second );
+      typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
+      hsiv.set(iNext).setScore() = d;
+      ////int j =
+      hsiv.fixDecr(iNext);
+      ////cerr<<"    adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
+      iNext++;
+      ////for(int i=0;i<iNext;i++) cerr<<"      "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
+    }
+  }
+  return pair<typename V::ElementType,int> ( hsiv.getMin().getScore(), hsiv.getMin().second.getRef().getNumber() );
+}
+////////////////////////////////////////////////////////////////////////////////
+#endif //_NL_ARCHITYPESET_

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-beam.h ADDED Viewed

	@@ -0,0 +1,164 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_BEAM__
+#define _NL_BEAM__
+#include "nl-heap.h"
+#include "nl-hash.h"
+//#include <boost/thread/thread.hpp>
+//#include <boost/thread/mutex.hpp>
+#include <tr1/unordered_map>
+////////////////////////////////////////////////////////////////////////////////
+/*
+template <class R>
+class SafePtr {
+ private:
+  R* pr;
+  static R rDummy;
+ public:
+  SafePtr<R> ( )      : pr(NULL) { }
+  SafePtr<R> ( R& r ) : pr(&r)   { }
+  bool operator== ( const SafePtr<R>& spr ) const { return(pr==spr.pr); }
+  bool operator!= ( const SafePtr<R>& spr ) const { return(!(pr==spr.pr)); }
+  R&       set ( )       { assert(pr); return (pr!=NULL) ? *pr : rDummy; }
+  const R& get ( ) const { return (pr!=NULL) ? *pr : rDummy; }
+};
+template <class R>
+R SafePtr<R>::rDummy = R();
+template <class S, class R>
+class ScoredPtr : public SafePtr<R> {
+ public:
+  S scr;
+  ScoredPtr<S,R> ( )           : SafePtr<R>() , scr()  { }
+  ScoredPtr<S,R> ( S s, R& r ) : SafePtr<R>(r), scr(s) { }
+  S& setScore()       { return scr; }
+  S  getScore() const { return scr; }
+};
+*/
+////////////////////////////////////////////////////////////////////////////////
+template <class S, class C>
+class ScoredIter : public C::iterator {
+ private:
+  //static C cDummy;
+  S s;
+ public:
+  ScoredIter<S,C> ( )                                      : C::iterator(0,0), s()   { }
+  ScoredIter<S,C> ( S s1, const typename C::iterator& i1 ) : C::iterator(i1),  s(s1) { }
+  //ScoredIter<S,C> ( )                                      : C::iterator(cDummy.end()), s()   { }
+  S& setScore()       { return s; }
+  S  getScore() const { return s; }
+};
+//template <class S, class C> C ScoredIter<S,C>::cDummy;
+////////////////////////////////////////////////////////////////////////////////
+template <class S,class K,class D>
+class Beam {
+ public:
+  typedef std::pair<int,D> ID;
+  typedef std::pair<K,std::pair<int,D> > KID;
+  typedef std::tr1::unordered_multimap<K,ID,SimpleHashFn<K>,SimpleHashEqual<K> > BeamMap;
+  typedef MinHeap<ScoredIter<S,BeamMap> >                                        BeamHeap;
+ private:
+  BeamMap  mkid;
+  BeamHeap hspkid;
+ public:
+  // Constructor methods...
+  Beam<S,K,D> ( int i ) : mkid(2*i), hspkid(i) { for(int j=0;j<i;j++)set(j,K(),D(),S()); }
+  // Specification methods...
+  bool tryAdd (        const K&,   const D&,   const S&   ) ;
+  void set    ( int i, const K& k, const D& d, const S& s ) { hspkid.set(i) = ScoredIter<S,BeamMap>(s,mkid.insert(KID(k,ID(i,d)))); }
+  // Extraction methods...
+  const ScoredIter<S,BeamMap>& getMin ( )       const { return hspkid.getMin(); }
+  const ScoredIter<S,BeamMap>& get    ( int i ) const { return hspkid.get(i);   }
+  void                         sort   ( SafeArray1D<Id<int>,std::pair<std::pair<K,D>,S> >& ) ;
+  void write(FILE *pf){
+/*    for (typename BeamMap::const_iterator i = mkid.begin(); i != mkid.end(); i++){
+      i->first.write(pf);
+      fprintf(pf, " %d ", i->second.first);
+//      i->second.second.write(pf);
+      fprintf(pf, "\n");
+    }
+*/
+    for(int i=0; i<hspkid.getSize(); i++){
+      fprintf(pf, "%d ", hspkid.get(i).getScore().toInt());
+      hspkid.get(i)->first.write(pf);
+      fprintf(pf, "\n");
+    }
+  }
+};
+template <class S,class K,class D>
+bool Beam<S,K,D>::tryAdd ( const K& k, const D& d, const S& s ) {
+  // If score good enough to get into beam...
+  if ( s > hspkid.getMin().getScore() ) {
+    typename BeamMap::const_iterator i = mkid.find(k);
+    // If key in beam already...
+    if ( i != mkid.end() ) {
+      // If same key in beam now has better score...
+      if ( s > hspkid.get(i->second.first).getScore() ) {
+        // Update score (and data associated with that score)...
+        hspkid.set(i->second.first).setScore() = s;
+        hspkid.set(i->second.first)->second.second = d;
+        // Update heap...
+        int iStart = i->second.first; int iDeeper = hspkid.fixIncr(iStart);
+        // Fix pointers in hash...
+        for ( int j = iDeeper+1; j>=iStart+1; j/=2 ) hspkid.set(j-1)->second.first = j-1;
+      }
+    }
+    // If x not in beam already, add...
+    else {
+      // Remove min from map (via pointer in heap)...
+      mkid.erase ( hspkid.getMin() );
+      // Insert new entry at min...
+      set(0,k,d,s);
+      // Update heap...
+      int iStart = 0; int iDeeper = hspkid.fixIncr(iStart);
+      // Fix pointers in hash...
+      for ( int j = iDeeper+1; j>=iStart+1; j/=2 ) hspkid.set(j-1)->second.first = j-1;
+    }
+  }
+  return ( LogProb() != hspkid.getMin().getScore() );  // true = beam full, false = beam still has gaps
+}
+template <class S,class K,class D>
+void Beam<S,K,D>::sort ( SafeArray1D<Id<int>,std::pair<std::pair<K,D>,S> >& akdsOut ) {
+  for ( int i=0; i<hspkid.getSize(); i++ ) {
+    akdsOut.set(hspkid.getSize()-i-1).first.first  = hspkid.getMin()->first;         // copy min key to output key.
+    akdsOut.set(hspkid.getSize()-i-1).first.second = hspkid.getMin()->second.second; // copy min dat to output dat.
+    akdsOut.set(hspkid.getSize()-i-1).second       = hspkid.getMin().getScore();          // copy min scr to output scr.
+    hspkid.setMin().setScore()                     = LogProb(1);            // get min out of the way.
+    hspkid.fixIncr(0);                                                      // repair heap.
+  }
+}
+////////////////////////////////////////////////////////////////////////////////
+#endif //_NL_BEAM__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-const.h ADDED Viewed

	@@ -0,0 +1,125 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef __NL_CONST_H_
+#define __NL_CONST_H_
+//#include <getopt.h>
+///////////////////////////////////////////////////////////////////////////////
+// type defs...
+typedef short int16 ;
+typedef int16 Sample ;
+//typedef int   Mag ;
+//typedef int   Gam ;
+///////////////////////////////////////////////////////////////////////////////
+// Misc consts...
+int max(int i,int j) {return (i>j)?i:j;}
+int min(int i,int j) {return (i<j)?i:j;}
+inline size_t rotLeft (const size_t& n, const size_t& i) { return (n << i) | (n >> (sizeof(size_t) - i)); }
+inline size_t rotRight(const size_t& n, const size_t& i) { return (n >> i) | (n << (sizeof(size_t) - i)); }
+//inline float abs ( float a ) { return (a>=0)?a:-a; }
+/*
+///////////////////////////////////////////////////////////////////////////////
+// Basic phone recognition consts...
+static const int NUM_SAMPLES_PER_FRAME = 512;
+#ifdef OLD_Q
+static const int FRAME_RATE_IN_SAMPLES = 160;  //// 80
+#else
+static const int FRAME_RATE_IN_SAMPLES = 256; //// 160;  //// 80
+#endif
+static const int LOG_NUM_FREQUENCIES = 8;
+static const int LOG_NUM_QUEFRENCIES = LOG_NUM_FREQUENCIES;
+static const int NUM_FREQUENCIES = 1<<LOG_NUM_FREQUENCIES;
+static const int NUM_QUEFRENCIES = 1<<LOG_NUM_QUEFRENCIES;
+///////////////////////////////////////////////////////////////////////////////
+// Output format globals
+static bool OUTPUT_QUIET = false;
+///////////////////////////////////////////////////////////////////////////////
+// H/O consts...
+static int LOG_MAX_SIGNS = 13;  // NOTE: bit limit: LOG_MAX_SIGNS + 3*LOG_MAX_ENTS < 31
+static int MAX_SIGNS     = 1<<LOG_MAX_SIGNS;
+static int MAX_IVS       = 100;
+///////////////////////////////////////////////////////////////////////////////
+// H sign recognition consts...
+static double INSERT_PENALTY   = 1.0;   // MULTIPLICATIVE
+static int    MAX_FANOUT       = 150;
+static const int MAX_BOOLS     = 2;
+static const int MAX_TRUTHVALS = 3;
+///////////////////////////////////////////////////////////////////////////////
+// H sem recognition consts...
+static int LOG_MAX_ENTS = 6;
+static int MAX_ENTS     = 1<<LOG_MAX_ENTS;
+static int MAX_CONTEXTS = 100;
+static int MAX_RELNS    = 100;
+static int MAX_CATS     = 1000;
+///////////////////////////////////////////////////////////////////////////////
+// Reader consts...
+static int MAX_READER_FIELDS    = 50; //62442; //20;
+static int LENGTH_READER_FIELDS = 1024; //512; //256;
+///////////////////////////////////////////////////////////////////////////////
+// HMM consts...
+//static const int BEAM_WIDTH = 4095;
+static int BEAM_WIDTH = 63; //255;
+//static const int BEAM_WIDTH = 1023;
+///////////////////////////////////////////////////////////////////////////////
+static const int NUM_MFCC_FILTERS = 40;
+static const int NUM_CEPSTRUM = 13;
+static const int WEIGHT_SIZE = 8;
+static const int MFCC_SIZE = 3 * NUM_CEPSTRUM;
+static const float MIN_FREQUENCY = 0; //130.0;
+static const float MAX_FREQUENCY = 8000.0; //Max allowed freq in signal is 16000Hz
+static const int MEAN_SIZE = (WEIGHT_SIZE * MFCC_SIZE);
+//Use a diagonal matrix for now
+//static const int COVARIANCE_SIZE = (MEAN_SIZE *  MFCC_SIZE);
+static const int COVARIANCE_SIZE = MEAN_SIZE;
+static const int MAX_NUM_FRAMES = 10000;
+static const float PREEMPASIZE_FACTOR = 0.97;
+static const int NUM_FFT_POINTS = NUM_SAMPLES_PER_FRAME;
+static const int SAMPLING_RATE = 16000;
+static const bool DEBUG_MODE = false;
+*/
+#endif /*__NL_CONST_H_*/

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-crf.h ADDED Viewed

	@@ -0,0 +1,359 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_CRF__
+#define _NL_CRF__
+#include "nl-safeids.h"
+#include "nl-probmodel.h"
+#include <cassert>
+#include <math.h>
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  CRFModeledRV<Y>
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2>
+class CRF3DModeledRV : public Y {
+ private:
+  // Static data members...
+  static bool               bModel;      // whether model defined yet
+  static int                cardGlb;     // global dependencies (used in all potentials)
+  static int                cardOff;     // offset positions in site var sequence
+  static int                cardSh;      // clique shapes at each offset
+  static int                cardCnd;     // possible condition clique configs incl non-site vars in high bits
+  static int                bitsVal;     // size in bits of val part of clique config
+  static int                bitsValSite; // size in bits of each site var in val clique config
+  static SafeArray5D<Id<int>,int,int,int,int,float> aaaaaPotentials;  // the model
+/*   static SafeArray3D<int>   aaaCnds;          // calc features only once per frame */
+ public:
+  // Static extraction methods...
+  static const float& getPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
+    { assert(bModel); return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
+  // Static specification methods...
+  static void   init         ( int g, int o, int s, int c, int v, int b )
+    { cardGlb=g; cardOff=o; cardSh=s; cardCnd=c; bitsVal=v; bitsValSite=b; }
+  static float& setPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
+    { if(!bModel){aaaaaPotentials.init(cardGlb,cardOff,cardSh,cardCnd,1<<bitsVal,1.0); bModel=true;}
+      return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
+  static void updateObservCliques ( const X1&, const X2& ) ;
+  // Static input / output methods...
+  static bool readModelFields ( char*[], int ) ;
+  // Extraction methods...
+  Prob getProb ( const X1&, const X2& ) const ;
+  // Input / output methods...
+  void writeObservCliqueConfigs ( FILE*, int, const char*, const X1&, const X2&, bool ) const ;
+};
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2> bool               CRF3DModeledRV<Y,X1,X2>::bModel      = false;
+template <class Y,class X1,class X2> int                CRF3DModeledRV<Y,X1,X2>::cardGlb     = 0;
+template <class Y,class X1,class X2> int                CRF3DModeledRV<Y,X1,X2>::cardOff     = 0;
+template <class Y,class X1,class X2> int                CRF3DModeledRV<Y,X1,X2>::cardSh      = 0;
+template <class Y,class X1,class X2> int                CRF3DModeledRV<Y,X1,X2>::cardCnd     = 0;
+template <class Y,class X1,class X2> int                CRF3DModeledRV<Y,X1,X2>::bitsVal     = 0;
+template <class Y,class X1,class X2> int                CRF3DModeledRV<Y,X1,X2>::bitsValSite = 0;
+template <class Y,class X1,class X2> SafeArray5D<Id<int>,int,int,int,int,float> CRF3DModeledRV<Y,X1,X2>::aaaaaPotentials;
+/* template <class Y,class X1,class X2> SafeArray3D<int>   CRF3DModeledRV<Y,X1,X2>::aaaCnds; */
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2>
+Prob CRF3DModeledRV<Y,X1,X2>::getProb( const X1& x1, const X2& x2 ) const {
+  SafeArray2D<int,int,int>    aaCnds  ( cardOff, cardSh ) ;
+  SafeArray2D<int,int,double> aaTrell ( cardOff, 1<<bitsVal, 0.0 ) ;
+  double prob = 1.0;
+  // For each offset...
+  for ( int off=0; off<cardOff; off++ )
+    // For each shape...
+    for ( int sh=0; sh<cardSh; sh++ )
+      // Update clique config for condition...
+      aaCnds.set(off,sh) = Y::getCliqueConfigCnd ( x1, x2, off, sh ) ;
+  // For each offset...
+  for ( int off=0; off<cardOff; off++ ) {
+    // For each shape...
+    for ( int sh=0; sh<cardSh; sh++ )
+      // Multiply phi for feature (that is, exp lambda) into numerator...
+      prob *= getPotential(Y::getGlobalDependency(x1,x2),off,sh,
+                           aaCnds.get(off,sh),
+                           Y::getCliqueConfigVal(off,sh));
+    // If first column in trellis...
+    if ( 0==off ) {
+      // For each trellis value...
+      for ( int configVal=0; configVal<(1<<bitsVal); configVal++ ) {
+        // Add weight of each shape at current offset...
+        float prod=1.0;
+        for ( int sh=0; sh<cardSh; sh++ )
+          prod *= getPotential(Y::getGlobalDependency(x1,x2),off,sh,
+                               aaCnds.get(off,sh),
+                               configVal) ;
+        aaTrell.set(off,configVal) = prod ;
+      }
+    // If subsequent column in trellis...
+    } else {
+      // For each trellis transition (overlap = all but one)...
+      for ( int configRghtValSite=0; configRghtValSite<(1<<bitsValSite); configRghtValSite++ )
+        for ( int configValOverlap=0; configValOverlap<(1<<(bitsVal-bitsValSite)); configValOverlap++ ) {
+          int configRghtVal = (configValOverlap<<bitsValSite)+configRghtValSite;
+          // For each possible preceding trellis node...
+          for ( int configLeftValSite=0; configLeftValSite<(1<<bitsValSite); configLeftValSite++ ) {
+            int configLeftVal = (configLeftValSite<<(bitsVal-bitsValSite))+configValOverlap;
+            // Add product of result and previous trellis cell to current trellis cell...
+            aaTrell.set(off,configRghtVal) += aaTrell.get(off-1,configLeftVal) ;
+          }
+          // Multiply weight of each shape...
+          float prod=1.0;
+          for ( int sh=0; sh<cardSh; sh++ )
+            prod *= getPotential(Y::getGlobalDependency(x1,x2),off,sh,
+                                 aaCnds.get(off,sh),
+                                 configRghtVal);
+          aaTrell.set(off,configRghtVal) *= prod;
+        }
+    }
+  } // END EACH OFFSET
+  // Calc total prob mass: sum of all possible forward scores in trellis...
+  double probZ = 0.0;
+  for ( int i=0; i<(1<<bitsVal); i++ )
+    probZ += aaTrell.get(cardOff-1,i);
+  // Normalize prob by total prob mass...
+  return prob/probZ;
+}
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2>
+bool CRF3DModeledRV<Y,X1,X2>::readModelFields ( char* aps[], int numFields ) {
+  if ( 7==numFields )
+    setPotential ( X1(string(aps[1])),                  // globals
+                   atoi(aps[2]),                        // offsets
+                   atoi(aps[3]),                        // shapes
+                   atoi(aps[4]),                        // cnds
+                   atoi(aps[5]) ) = exp(atof(aps[6])) ; // vals
+  else return false;
+  return true;
+}
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2>
+void CRF3DModeledRV<Y,X1,X2>::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl,
+                                                         const X1& x1, const X2& x2, bool bObsVal ) const {
+  fprintf ( pf, "%04d> %s ", frame, psMdl );
+  // For each shape (feature slope)...
+  for ( int sh=0; sh<cardSh; sh++ ) {
+    // Print clique config condition at each offset...
+    for ( int off=0; off<cardOff; off++ )
+      fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigCnd(x1,x2,off,sh)) );
+    if (sh<cardSh-1) printf(",");   // commas between shapes
+  }
+  printf(" : ");  // cond/val delimiter
+  // Print clique config value at each offset...
+  if ( bObsVal )
+    for ( int off=0; off<cardOff; off++ )
+      fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigVal(off,0)) );
+  else fprintf ( pf, "_" ) ;
+  printf("\n");
+}
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  CRF4DModeledRV<Y>
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2,class X3>
+class CRF4DModeledRV : public Y {
+ private:
+  // Static data members...
+  static bool               bModel;      // whether model defined yet
+  static int                cardGlb;     // global dependencies (used in all potentials)
+  static int                cardOff;     // offset positions in site var sequence
+  static int                cardSh;      // clique shapes at each offset
+  static int                cardCnd;     // possible condition clique configs incl non-site vars in high bits
+  static int                bitsVal;     // size in bits of val part of clique config
+  static int                bitsValSite; // size in bits of each site var in val clique config
+  static SafeArray5D<Id<int>,int,int,int,int,float> aaaaaPotentials;  // the model
+/*   static SafeArray3D<int>   aaaCnds;          // calc features only once per frame */
+ public:
+  // Static extraction methods...
+  static const float& getPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
+    { assert(bModel); return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
+  // Static specification methods...
+  static void   init         ( int g, int o, int s, int c, int v, int b )
+    { cardGlb=g; cardOff=o; cardSh=s; cardCnd=c; bitsVal=v; bitsValSite=b; }
+  static float& setPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
+    { if(!bModel){aaaaaPotentials.init(cardGlb,cardOff,cardSh,cardCnd,1<<bitsVal,1.0); bModel=true;}
+      return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
+  // Static input / output methods...
+  static bool readModelFields ( char*[], int ) ;
+  // Extraction methods...
+  Prob getProb ( const X1&, const X2&, const X3& ) const ;
+  // Input / output methods...
+  void writeObservCliqueConfigs ( FILE*, int, const char*, const X1&, const X2&, const X3&, bool ) const ;
+};
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2,class X3> bool CRF4DModeledRV<Y,X1,X2,X3>::bModel   = false;
+template <class Y,class X1,class X2,class X3> int  CRF4DModeledRV<Y,X1,X2,X3>::cardGlb     = 0;
+template <class Y,class X1,class X2,class X3> int  CRF4DModeledRV<Y,X1,X2,X3>::cardOff     = 0;
+template <class Y,class X1,class X2,class X3> int  CRF4DModeledRV<Y,X1,X2,X3>::cardSh      = 0;
+template <class Y,class X1,class X2,class X3> int  CRF4DModeledRV<Y,X1,X2,X3>::cardCnd     = 0;
+template <class Y,class X1,class X2,class X3> int  CRF4DModeledRV<Y,X1,X2,X3>::bitsVal     = 0;
+template <class Y,class X1,class X2,class X3> int  CRF4DModeledRV<Y,X1,X2,X3>::bitsValSite = 0;
+template <class Y,class X1,class X2,class X3> SafeArray5D<Id<int>,int,int,int,int,float>
+               CRF4DModeledRV<Y,X1,X2,X3>::aaaaaPotentials;
+/* template <class Y,class X1,class X2> SafeArray3D<int>   CRF4DModeledRV<Y,X1,X2>::aaaCnds; */
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2,class X3>
+Prob CRF4DModeledRV<Y,X1,X2,X3>::getProb( const X1& x1, const X2& x2, const X3& x3 ) const {
+  SafeArray2D<int,int,int>    aaCnds  ( cardOff, cardSh ) ;
+  SafeArray2D<int,int,double> aaTrell ( cardOff, 1<<bitsVal, 0.0 ) ;
+  double prob = 1.0;
+  // For each offset...
+  for ( int off=0; off<cardOff; off++ )
+    // For each shape...
+    for ( int sh=0; sh<cardSh; sh++ )
+      // Update clique config for condition...
+      aaCnds.set(off,sh) = Y::getCliqueConfigCnd ( x1, x2, x3, off, sh ) ;
+  // For each offset...
+  for ( int off=0; off<cardOff; off++ ) {
+    // For each shape...
+    for ( int sh=0; sh<cardSh; sh++ )
+      // Multiply phi for feature (that is, exp lambda) into numerator...
+      prob *= getPotential(Y::getGlobalDependency(x1,x2,x3),off,sh,
+                           aaCnds.get(off,sh),
+                           Y::getCliqueConfigVal(off,sh));
+    // If first column in trellis...
+    if ( 0==off ) {
+      // For each trellis value...
+      for ( int configVal=0; configVal<(1<<bitsVal); configVal++ ) {
+        // Add weight of each shape at current offset...
+        float prod=1.0;
+        for ( int sh=0; sh<cardSh; sh++ )
+          prod *= getPotential(Y::getGlobalDependency(x1,x2,x3),off,sh,
+                               aaCnds.get(off,sh),
+                               configVal) ;
+        aaTrell.set(off,configVal) = prod ;
+      }
+    // If subsequent column in trellis...
+    } else {
+      // For each trellis transition (overlap = all but one)...
+      for ( int configRghtValSite=0; configRghtValSite<(1<<bitsValSite); configRghtValSite++ )
+        for ( int configValOverlap=0; configValOverlap<(1<<(bitsVal-bitsValSite)); configValOverlap++ ) {
+          int configRghtVal = (configValOverlap<<bitsValSite)+configRghtValSite;
+          // For each possible preceding trellis node...
+          for ( int configLeftValSite=0; configLeftValSite<(1<<bitsValSite); configLeftValSite++ ) {
+            int configLeftVal = (configLeftValSite<<(bitsVal-bitsValSite))+configValOverlap;
+            // Add product of result and previous trellis cell to current trellis cell...
+            aaTrell.set(off,configRghtVal) += aaTrell.get(off-1,configLeftVal) ;
+          }
+          // Multiply weight of each shape...
+          float prod=1.0;
+          for ( int sh=0; sh<cardSh; sh++ )
+            prod *= getPotential(Y::getGlobalDependency(x1,x2,x3),off,sh,
+                                 aaCnds.get(off,sh),
+                                 configRghtVal);
+          aaTrell.set(off,configRghtVal) *= prod;
+        }
+    }
+  } // END EACH OFFSET
+  // Calc total prob mass: sum of all possible forward scores in trellis...
+  double probZ = 0.0;
+  for ( int i=0; i<(1<<bitsVal); i++ )
+    probZ += aaTrell.get(cardOff-1,i);
+  // Normalize prob by total prob mass...
+  return prob/probZ;
+}
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2,class X3>
+bool CRF4DModeledRV<Y,X1,X2,X3>::readModelFields ( char* aps[], int numFields ) {
+  if ( 7==numFields )
+    setPotential ( X1(string(aps[1])),                  // globals
+                   atoi(aps[2]),                        // offsets
+                   atoi(aps[3]),                        // shapes
+                   atoi(aps[4]),                        // cnds
+                   atoi(aps[5]) ) = exp(atof(aps[6])) ; // vals
+  else return false;
+  return true;
+}
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2, class X3>
+void CRF4DModeledRV<Y,X1,X2,X3>::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl,
+                                                            const X1& x1, const X2& x2,
+                                                            const X3& x3, bool bObsVal ) const {
+  fprintf ( pf, "%04d> %s ", frame, psMdl );
+  // For each shape (feature slope)...
+  for ( int sh=0; sh<cardSh; sh++ ) {
+    // Print clique config condition at each offset...
+    for ( int off=0; off<cardOff; off++ )
+      fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigCnd(x1,x2,x3,off,sh)) );
+    if (sh<cardSh-1) printf(",");   // commas between shapes
+  }
+  printf(" : ");  // cond/val delimiter
+  // Print clique config value at each offset...
+  if ( bObsVal )
+    for ( int off=0; off<cardOff; off++ )
+      fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigVal(off,0)) );
+  else fprintf ( pf, "_" ) ;
+  printf("\n");
+}
+#endif //_NL_CRF__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h ADDED Viewed

	@@ -0,0 +1,479 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_DTREE_CONTIN__
+#define _NL_DTREE_CONTIN__
+#include "nl-dtree.h"
+typedef double Wt;
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Cont DTree Node
+//
+////////////////////////////////////////////////////////////////////////////////
+template<class Y, class P>
+class ContDecisNode : public DecisNode<Y,P> {
+ private:
+  // Data members...
+  Wt        wThreshold;     // Threshold weight ("w_0")
+  map<A,Wt> awSeparator;    // Hyperplane separator, weights on each attribute/dimension
+  Wt        wSumSqr;        // Sum of squares (parabolic) convolution coordinate weight
+ public:
+  // Constructor / destructor methods...
+  ContDecisNode ( ) : wThreshold(0.0), wSumSqr(0.0) { }
+  // Specification methods...
+  Wt& setWt   ( )           { return wThreshold; }
+  Wt& setWt   ( const A a ) { return (awSeparator.find(a)!=awSeparator.end()) ? awSeparator[a] : awSeparator[a]=0.0; }
+  Wt& setSsWt ( )           { return wSumSqr; }
+  // Extraction methods...
+  const Wt getWt   ( )           const { return wThreshold; }
+  const Wt getWt   ( const A a ) const { return ( (awSeparator.find(a)!=awSeparator.end()) ? awSeparator.find(a)->second : 0.0 ); }
+  const Wt getSsWt ( )           const { return wSumSqr; }
+};
+////////////////////////////////////////////////////////////////////////////////
+//
+//  ContDTree Model
+//
+////////////////////////////////////////////////////////////////////////////////
+template<class Y, class X, class P>
+class ContDTree2DModel : public Generic2DModel<Y,X,P>, public Tree<ContDecisNode<Y,P> > {
+ public:
+  // Downcasts (safe b/c no new data)...
+  ContDTree2DModel<Y,X,P>&       setLeft()        { return static_cast<ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::setLeft()  ); }
+  ContDTree2DModel<Y,X,P>&       setRight()       { return static_cast<ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::setRight() ); }
+  const ContDTree2DModel<Y,X,P>& getLeft()  const { return static_cast<const ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::getLeft()  ); }
+  const ContDTree2DModel<Y,X,P>& getRight() const { return static_cast<const ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::getRight() ); }
+  // Extraction methods...
+  const P getProb ( const Y y, const X& x ) const {
+    const Tree<ContDecisNode<Y,P> >* ptr = this;
+      while ( !ptr->isTerm() ) {
+        double sumsqr=0.0;
+        for(A a;a<X::getSize();a.setNext()) sumsqr += pow(x.get(a.toInt()),2.0) / X::getSize();
+        Wt wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
+        for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * x.get(a.toInt());
+        wtdavg += Tree<ContDecisNode<Y,P> >::getSsWt() * sumsqr;
+        ptr = (wtdavg>0.0) ? &ptr->getRight() : &ptr->getLeft();
+      }
+    return ptr->getProb(y);
+  }
+  // Input / output methods...
+  bool readFields  ( char*[], int ) ;
+  void writeFields ( FILE* pf, string sPref ) {
+    char psPath[1000] = "";
+    write ( pf, (sPref+"").c_str(), psPath, 0 );
+  }
+  void write ( FILE* pf, const char psPrefix[], char psPath[], int iEnd ) const {
+    if (Tree<ContDecisNode<Y,P> >::isTerm()) {
+      Y y;
+      psPath[iEnd]='\0';
+      for ( bool b=y.setFirst(); b; b=y.setNext() )
+        { fprintf(pf, "%s [%s] : ", psPrefix, psPath); y.write(pf); fprintf(pf, " = %f\n", (double)Tree<ContDecisNode<Y,P> >::getProb(y)); }
+      ////psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] : 0 = %f\n", psPrefix, psPath, (double)Tree<ContDecisNode<Y,P> >::getProb("0") );
+      ////psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] : 1 = %f\n", psPrefix, psPath, (double)Tree<ContDecisNode<Y,P> >::getProb("1") );
+    } else {
+        psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] =", psPrefix, psPath );
+        fprintf ( pf, " %f", Tree<ContDecisNode<Y,P> >::getWt() );
+        for(A a;a<X::getSize();a.setNext()) fprintf ( pf, "_%f", Tree<ContDecisNode<Y,P> >::getWt(a.toInt()) );
+        fprintf ( pf, "_%f", Tree<ContDecisNode<Y,P> >::getSsWt() );
+        fprintf ( pf, "\n" );
+      psPath[iEnd]='0'; psPath[iEnd+1]='\0'; getLeft().write  ( pf, psPrefix, psPath, iEnd+1 );
+      psPath[iEnd]='1'; psPath[iEnd+1]='\0'; getRight().write ( pf, psPrefix, psPath, iEnd+1 );
+    }
+  }
+};
+////////////////////
+template <class Y,class X, class P>
+bool ContDTree2DModel<Y,X,P>::readFields ( char* aps[], int numFields ) {
+  if ( /*aps[0]==sId &&*/ (3==numFields || 4==numFields) ) {
+    //fprintf(stderr,"%s,%d\n",aps[3],numFields);
+    assert ( '['==aps[1][0] && ']'==aps[1][strlen(aps[1])-1] );
+    // Start at root...
+    Tree<ContDecisNode<Y,P> >* ptr = this;
+    assert(ptr);
+    // Find appropriate node, creating nodes as necessary...
+    for(int i=1; i<strlen(aps[1])-1; i++) {
+      assert ( '0'==aps[1][i] || '1'==aps[1][i] );
+      ptr = ( ('0'==aps[1][i]) ? &ptr->setLeft() : &ptr->setRight() ) ;
+      assert(ptr);
+    }
+    // Specify bit (at nonterminal) or distribution (at terminal)...
+    if ( 3==numFields) {
+      char* psT=NULL; Tree<ContDecisNode<Y,P> >::setWt() = atof(strtok_r(aps[2],"_",&psT));  ////atof(aps[2]);
+      for(A a;a<X::getSize();a.setNext()) Tree<ContDecisNode<Y,P> >::setWt(a) = atof(strtok_r(NULL,"_",&psT));
+      Tree<ContDecisNode<Y,P> >::setSsWt() = atof(strtok_r(NULL,"_",&psT)); }
+      // atof(aps[3+a.toInt()]); }
+    else if (4==numFields) ptr->setProb(aps[2]) = atof(aps[3]);
+    else assert(false);
+  } else return false;
+  return true;
+}
+////////////////////////////////////////////////////////////////////////////////
+template<class Y, class X1,class X2, class P>
+class ContDTree3DModel : public Generic3DModel<Y,X1,X2,P> {
+ private:
+  // Data members...
+  string                               sId;
+  SimpleHash<X1,ContDTree2DModel<Y,X2,P> > aqt;
+ public:
+  // Constructor / destructor methods...
+  ContDTree3DModel ( )                 { }
+  ContDTree3DModel ( const string& s ) { sId = s; }
+  // Specification methods...
+  ContDTree2DModel<Y,X2,P>& setTree ( const X1& x1 ) { return aqt.set(x1); }
+  // Extraction methods...
+  bool setFirst ( Y& y )                                  const { return y.setFirst(); }
+  bool setNext  ( Y& y )                                  const { return y.setNext(); }
+  P    getProb  ( const Y y, const X1& x1, const X2& x2 ) const { return aqt.get(x1).getProb(y,x2); }
+  // Input / output methods...
+  bool readFields  ( char*[], int ) ;
+  void writeFields ( FILE* pf, string sPref ) {
+    char psPath[1000] = "";
+    X1 x1;
+    for ( bool b=x1.setFirst(); b; b=x1.setNext() )
+      aqt.get(x1).write ( pf, (sPref + " " + x1.getString()).c_str(), psPath, 0 );
+  }
+};
+////////////////////
+template <class Y,class X1,class X2, class P>
+bool ContDTree3DModel<Y,X1,X2,P>::readFields ( char* aps[], int numFields ) {
+  if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) {
+    //fprintf(stderr,"%s,%d\n",aps[3],numFields);
+    assert ( '['==aps[2][0] && ']'==aps[2][strlen(aps[2])-1] );
+    // Start at root...
+    Tree<ContDecisNode<Y,P> >* ptr = &aqt.set(aps[1]);
+    assert(ptr);
+    // Find appropriate node, creating nodes as necessary...
+    for(int i=1; i<strlen(aps[2])-1; i++) {
+      assert ( '0'==aps[2][i] || '1'==aps[2][i] );
+      ptr = ( ('0'==aps[2][i]) ? &ptr->setLeft() : &ptr->setRight() ) ;
+      assert(ptr);
+    }
+    // Specify bit (at nonterminal) or distribution (at terminal)...
+    if ( 4==numFields) {
+      char* psT=NULL;
+      ptr->setWt() = atof(strtok_r(aps[3],"_",&psT));  ////atof(aps[3]);
+      for(A a;a<X2::getSize();a.setNext()) ptr->setWt(a) = atof(strtok_r(NULL,"_",&psT));
+      ptr->setSsWt() = atof(strtok_r(NULL,"_",&psT)); }
+      ////for(A a;a<X2::getSize();a.setNext()) ptr->setWt(a) = atof(aps[4+a.toInt()]); }
+    else if (5==numFields) ptr->setProb(aps[3]) = atof(aps[4]);
+    ////    else if (5==numFields && 0==strcmp(aps[3],"0")) ptr->setProb()   = 1.0 - atof(aps[4]);
+    ////    else if (5==numFields && 0==strcmp(aps[3],"1")) ptr->setProb()   = atof(aps[4]);
+    else assert(false);
+  } else return false;
+  return true;
+}
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Trainable ContDTree Model
+//
+////////////////////////////////////////////////////////////////////////////////
+template<class Y, class X, class P>
+class TrainableContDTree2DModel : public ContDTree2DModel<Y,X,P> {
+ private:
+  List<Joint2DRV<X,Y> > lxy;
+ public:
+  // Downcasts (safe b/c no new data)...
+  TrainableContDTree2DModel<Y,X,P>&       setLeft()        { return static_cast<TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::setLeft());  }
+  TrainableContDTree2DModel<Y,X,P>&       setRight()       { return static_cast<TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::setRight()); }
+  const TrainableContDTree2DModel<Y,X,P>& getLeft()  const { return static_cast<const TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::getLeft());  }
+  const TrainableContDTree2DModel<Y,X,P>& getRight() const { return static_cast<const TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::getRight()); }
+  // Specification methods...
+  void train ( List<Joint2DRV<X,Y> >&, const double ) ;
+  void train ( const double d )                        { train(lxy,d); }
+  ////// Input / output methods...
+  bool readData ( char* vs[], int numFields ) {
+    if ( 3==numFields ) lxy.add() = Joint2DRV<X,Y> ( X(vs[1]), Y(vs[2]) );
+    else return false;
+    return true;
+  }
+};
+////////////////////
+template<class Y, class X, class P>
+void  TrainableContDTree2DModel<Y,X,P>::train ( List<Joint2DRV<X,Y> >& lxy, const double DTREE_CHISQR_LIMIT ) {
+  // Place to store counts...
+  //CPT3DModel<A,B,Y,double> aaaCounts;  // hash was MUCH slower!!
+  SafeArray2D<B,Y,double> aaCounts ( 2, Y::getDomain().getSize(), 0.0 );
+  double dTot = lxy.getCard();
+  CPT1DModel<Y,double> modelY;
+//  if (11613==dTot) {  //if (12940<=dTot && dTot<12950) {  //if ( 20779==dTot ) { //// (bU)
+//    ListedObject<Joint2DRV<X,Y> >* pxy;
+//    for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
+//      fprintf(stdout,"Ohist "); pxy->getSub1().write(stdout); fprintf(stdout," "); pxy->getSub2().write(stdout); fprintf(stdout,"\n");
+//    }
+//    fprintf(stderr,"PRINTED\n");
+//  }
+  // For each datum in list...
+  ListedObject<Joint2DRV<X,Y> >* pxy;
+  for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
+    // Count Ys...
+    modelY.setProb(pxy->getSub2())++;
+  }
+  modelY.normalize();
+  double prRarest = (modelY.getProb("1")<modelY.getProb("0")) ? modelY.getProb("1") : modelY.getProb("0");
+//  // Set separator to pass through center of positives...
+//  for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) )
+//    for ( A a; a<X::getSize(); a.setNext() ) {
+//      if ( Y("1")==pxy->getSub2() ) {
+//        Tree<ContDecisNode<Y,P> >::setWt()   -= (pxy->getSub1().get(a.toInt())+pow(pxy->getSub1().get(a.toInt()),2.0))/dTot; //// (dTot*prRarest);
+//        Tree<ContDecisNode<Y,P> >::setWt(a)  += pxy->getSub1().get(a.toInt())/dTot;  //// (dTot*prRarest);
+//        Tree<ContDecisNode<Y,P> >::setSsWt() += pow(pxy->getSub1().get(a.toInt()),2.0)/dTot; //// (dTot*prRarest);
+//      }
+//    }
+  // Set separator to pass through center of positives...
+  Tree<ContDecisNode<Y,P> >::setWt() = 1.0;
+  // For each gradient descent epoch...
+  for ( int epoch=1; epoch<=1000; epoch++ ) {
+    double dCtr=0.0;
+    double dPos     = 0.0;
+    ListedObject<Joint2DRV<X,Y> >* pxy;
+    if(OUTPUT_NOISY) {
+      double lgprTot = 0.0;
+//      // For each datum in list...
+//      for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
+//        // Calc tot prob...
+//        double wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
+//        for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * pxy->getSub1().get(a.toInt());
+//        // Calc est val of Y using sigmoid transfer fn...
+//        P prY = 1.0 / ( 1.0 + exp(-wtdavg) );
+//        if(epoch>1)fprintf(stderr,"    %f %f\n",(double)wtdavg,(double)prY);
+//        lgprTot += (pxy->getSub2()==1) ? log(prY) : log(1.0-prY) ;
+//      }
+      if (OUTPUT_NOISY && epoch%10==0) {
+        // Report...
+        fprintf(stderr,"  tot=%08d totlogprob=%g separator=%f",(int)dTot,lgprTot,Tree<ContDecisNode<Y,P> >::getWt());
+        for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getWt(a));
+        fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getSsWt());
+        fprintf(stderr,"\n");
+      }
+    }
+    fprintf(stderr,"  --- epoch %d ---\n",epoch);
+    // For each datum in list...
+    for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
+//    // Use random subset of more frequent Y val so total wts for 1 and 0 are equal (CODE REVIEW: should be subset nearest to centroid of fewer)...
+//    if ( double(rand())/double(RAND_MAX) < prRarest/modelY.getProb(pxy->getSub2()) ) {
+        dCtr++;
+        double gamma = dTot/(dTot+dCtr); // 1.0/(double(epoch)+dCtr/dTot); // 1.0/double(epoch); // 1.0/(double(epoch)+dCtr/(dTot*prRarest*2.0)); //
+        // Weight deltas for next epoch...
+        Wt wDelta = 0.0;
+        SafeArray1D<A,Wt> awDeltas (X::getSize(),0.0);
+        Wt wSsDelta = 0.0;
+        // Calc sum of squares for convolution coordinate...
+        double sumsqr=0.0;
+        for(A a;a<X::getSize();a.setNext()) sumsqr += pow(pxy->getSub1().get(a.toInt()),2.0) / X::getSize();
+        // Calc wtd avg of feats...
+        double wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
+        for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * pxy->getSub1().get(a.toInt());
+        wtdavg += Tree<ContDecisNode<Y,P> >::getSsWt() * sumsqr;
+        //// Calc est val of Y using sigmoid transfer fn...
+        //P prY = ( ( ( (1.0/(1.0+exp(-wtdavg))) - .5 ) * exp(-wtdavg) ) + .5 ) ;
+        // Calc est val of Y using sigmoid transfer fn...
+        P prY = 1.0 / ( 1.0 + exp(-wtdavg) );
+        // Calc deltas for each feature/attribute/dimension...
+        double dEachWt  = 1.0/dTot;  // 1.0/dTot * modelY.getProb ( Y(1-pxy->getSub2().toInt()) );  // 1.0/(dTot*prRarest*2.0); //
+        wDelta += dEachWt * -1 * ( prY - P(double(pxy->getSub2().toInt())) );
+        for ( A a; a<X::getSize(); a.setNext() )
+          awDeltas.set(a) += dEachWt * pxy->getSub1().get(a.toInt()) * ( prY - P(double(pxy->getSub2().toInt())) );
+        wSsDelta += dEachWt * sumsqr * ( prY - P(double(pxy->getSub2().toInt())) );
+        // Update weights by deltas...
+        //Tree<ContDecisNode<Y,P> >::setWt() -= gamma * wDelta;
+        ////double reldeduction = wDelta/Tree<ContDecisNode<Y,P> >::getWt();
+        for ( A a; a<X::getSize(); a.setNext() )
+          Tree<ContDecisNode<Y,P> >::setWt(a) -= gamma*awDeltas.get(a); //+ changeratio/Tree<ContDecisNode<Y,P> >::getWt(a);
+        Tree<ContDecisNode<Y,P> >::setSsWt()  -= gamma*wSsDelta;        //+ changeratio/Tree<ContDecisNode<Y,P> >::getSsWt();
+        dPos+=prY;  //      if (prY>0.5) dPos++;
+        // Report...
+        if(OUTPUT_VERYNOISY) {
+          fprintf(stderr,"    A tot=%08d vals = %f",(int)dTot,-1.00);
+          for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",pxy->getSub1().get(a.toInt()));
+          fprintf(stderr,"_%f",sumsqr);
+          fprintf(stderr,"  --> %f %f (gold: %f)\n",wtdavg,(double)prY,double(pxy->getSub2().toInt()));
+          fprintf(stderr,"    D tot=%08d delt = %f",(int)dTot,wDelta);
+          for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",awDeltas.get(a));
+          fprintf(stderr,"_%f",wSsDelta);
+          fprintf(stderr,"\n");
+        }
+        // Report...
+        if(OUTPUT_VERYNOISY) {
+          fprintf(stderr,"   _S tot=%08d sepr = %f",(int)dTot,Tree<ContDecisNode<Y,P> >::getWt());
+          for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getWt(a));
+          fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getSsWt());
+          fprintf(stderr,"\n");
+        }
+//    }
+    } // end loop pxy
+/*     // Report... */
+/*     if(OUTPUT_NOISY) { */
+/*       fprintf(stderr,"  tot:%08d +:%08d -:%08d\n",(int)dTot,(int)dPos,(int)(dTot-dPos)); */
+/*       fprintf(stderr,"  E tot=%08d separator=%f",(int)dTot,wDelta); */
+/*       for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",awDeltas.get(a)); */
+/*       fprintf(stderr,"\n"); */
+/*     } */
+  } // end loop epoch
+  // Split list into each 0/1 child of this node...
+  List<Joint2DRV<X,Y> > alxy[2];
+  int                   actr[2] = {0,0};
+  // For each datum in list...
+  while ( !lxy.isEmpty() ) {
+    Joint2DRV<X,Y>* pxy = lxy.getFirst();
+    double sumsqr=0.0;
+    for(A a;a<X::getSize();a.setNext()) sumsqr += pow(pxy->getSub1().get(a.toInt()),2.0) / X::getSize();
+    Wt wtdavg=-Tree<ContDecisNode<Y,P> >::getWt();
+    for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * pxy->getSub1().get(a.toInt());
+    wtdavg += Tree<ContDecisNode<Y,P> >::getSsWt() * sumsqr;
+    alxy[(wtdavg>0.0)?1:0].add() = *pxy;
+    aaCounts.set((wtdavg>0.0)?1:0,pxy->getSub2())++;
+    actr[(wtdavg>0.0)?1:0]++;
+    if(OUTPUT_VERYNOISY){fprintf(stderr,"classify "); pxy->write(stderr); fprintf(stderr," wtdavg=%f class=%d\n",wtdavg,(wtdavg>0.0)?1:0);}
+    lxy.pop();
+  }
+  // Calc chisqr...
+  double chisqr = 0.0;
+  fprintf(stderr,"    tot=%08d split=",(int)dTot);
+  for ( int b=0; b<2; b++ ) {
+    Y y;
+    for ( bool by=y.setFirst(); by; by=y.setNext() ) {
+      fprintf(stderr," (%s->%d:%f)",y.getString().c_str(),b,aaCounts.get(b,y));
+      if ( actr[b]>0.0 && modelY.getProb(y)>0.0 && dTot>0.0 ) {
+        double expect = actr[b] * modelY.getProb(y);
+        chisqr += pow ( aaCounts.get(b,y)-expect, 2.0 ) / expect;
+      }
+    }
+  }
+  fprintf(stderr,"\n");
+  // Report...
+  if(OUTPUT_NOISY) {
+    fprintf(stderr,"  tot=%08d separator=%f",(int)dTot,Tree<ContDecisNode<Y,P> >::getWt());
+    for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getWt(a));
+    fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getSsWt());
+    fprintf(stderr," chisqr=%g\n",chisqr);
+  }
+  // If separation is significant to chisqr limit...
+  if ( chisqr > DTREE_CHISQR_LIMIT ) {
+    // Recursively call train at each child...
+    setRight().train ( alxy[1], DTREE_CHISQR_LIMIT );  ////node*2LL+1LL);
+    setLeft().train  ( alxy[0], DTREE_CHISQR_LIMIT );  ////node*2LL);
+  }
+  // If separation is not significant...
+  else {
+    // Add ratio as leaf...
+    Y y;
+    for ( bool by=y.setFirst(); by; by=y.setNext() )
+      ContDecisNode<Y,P>::setProb(y) = (dTot>0.0) ? modelY.getProb(y) : 1.0/Y::getDomain().getSize();
+  }
+}
+////////////////////////////////////////////////////////////////////////////////
+template<class Y, class X1, class X2, class P>
+class TrainableContDTree3DModel : public ContDTree3DModel<Y,X1,X2,P> {
+ private:
+  map<X1,List<Joint2DRV<X2,Y> > > mqlxy;
+ public:
+  ////// Constructor...
+  TrainableContDTree3DModel()               { }
+  TrainableContDTree3DModel(const char* ps) : ContDTree3DModel<Y,X1,X2,P>(ps) { }
+  ////// setTree downcast...
+  TrainableContDTree2DModel<Y,X2,P>& setTree(const X1& x1) { return static_cast<TrainableContDTree2DModel<Y,X2,P>&>(ContDTree3DModel<Y,X1,X2,P>::setTree(x1)); }
+  ////// Add training data to per-subphone lists...
+  bool readData ( char* vs[], int numFields ) {
+    if ( 4==numFields ) {
+      mqlxy[X1(vs[1])].add() = Joint2DRV<X2,Y> ( X2(vs[2]), Y(vs[3]) );
+      ////mqlxy[X1(vs[1])].getLast()->write(stderr); fprintf(stderr,"\n");
+    }
+    else return false;
+    return true;
+  }
+  ////// Train each subphone...
+  void train ( const double DTREE_CHISQR_LIMIT ) {
+    int ctr = 0;
+    // For each subphone...
+    X1 x1; for ( bool b=x1.setFirst(); b; b=x1.setNext() ) {
+      if(OUTPUT_NOISY)
+        fprintf(stderr,"***** x1:%s (number %d) *****\n",x1.getString().c_str(),ctr++);
+      setTree(x1).train ( mqlxy[x1], DTREE_CHISQR_LIMIT );
+    }
+  }
+};
+#endif // _NL_DTREE_CONTIN__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h ADDED Viewed

	@@ -0,0 +1,242 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+//basically a SafeArray2D with operators defined
+template<class T>
+class Matrix : public SafeArray2D<Id<int>,Id<int>,T> {
+  //public:
+  //int xSize;
+  //int ySize;
+ public:
+  // Constructor / destructor methods...
+  //~Matrix( )                           { delete[] at; }
+  Matrix ( )                         : SafeArray2D<Id<int>,Id<int>,T>( )     { }//{ xSize=0; ySize=0;  }
+  Matrix (int x, int y)              : SafeArray2D<Id<int>,Id<int>,T>(x,y)   { }//{ xSize=x; ySize=y; }
+  Matrix (int x, int y, const T& t)  : SafeArray2D<Id<int>,Id<int>,T>(x,y,t) { }//{ xSize=x; ySize=y; }
+  Matrix (const Matrix& a)           : SafeArray2D<Id<int>,Id<int>,T>(a.xSize(),a.ySize()) { //xSize=a.xSize; ySize=a.ySize;
+                                                                               for(int i=0;i<xSize();i++) for(int j=0;j<ySize();j++) this->set(i,j)=a.get(i,j); }
+  // Specification methods...
+  //Matrix& operator= ( const Matrix<T>& sat )
+  //  { xSize=sat.xSize; ySize=sat.ySize; //at=new T[xSize*ySize];
+  //    for(int i=0;i<xSize;i++) for(int j=0;j<ySize;j++) set(i,j)=sat.at[i]; return *this; }
+  void  init ( int x,int y )                   { (*this)=Matrix<T>(x,y,T()); }//xSize=x; ySize=y; }
+  void  init ( int x,int y,const T& t )        { (*this)=Matrix<T>(x,y,t); }//xSize=x; ySize=y; }
+  void  reset()                                { (*this)=Matrix<T>( ); }//xSize=0; ySize=0; }
+  // Inherited methods
+  //T&    set  ( const X1& x,const X2& y);
+  //const T& get (const X1& x,const X2& y) const;
+  int xSize( ) const { return this->getxSize(); }
+  int ySize( ) const { return this->getySize(); }
+  // Math...
+  friend Matrix<T> operator* ( const Matrix<T>& a, const Matrix<T>& b ) {
+    if (a.ySize()!=b.xSize()) {
+      cerr<<"ERROR: matrix multiplication requires matching inner indices; "<<a.xSize()<<"x"<<a.ySize()<<" "<<b.xSize()<<"x"<<b.ySize()<<endl;
+      #ifndef NDEBUG
+      cerr<<" a= "<<a<<"\n\n b= "<<b<<endl;
+      #endif
+      return Matrix<T>();
+    }
+    Matrix mOut(a.xSize(),b.ySize(),T());
+    for (int i=0; i<a.xSize(); i++ ){
+      for (int k=0; k<a.ySize(); k++ ) {
+	for (int j=0; j<b.ySize(); j++ ) {
+	  mOut.set(i,j) += a.get(Id<int>(i),Id<int>(k))*b.get(Id<int>(k),Id<int>(j));
+	}
+      }
+    }
+    //cerr<<" a= "<<a<<"\n b= "<<b<<"\n c= "<<mOut<<endl<<endl;
+    return mOut;
+  }
+  friend Matrix<T> operator& ( const Matrix<T>& a, const Matrix<T>& b ) {
+    if (a.xSize()!=b.xSize() || a.ySize()!=b.ySize()) {
+      cerr<<"ERROR: pt-by-pt multiplication requires matching indices; "<<a.xSize()<<"x"<<a.ySize()<<" "<<b.xSize()<<"x"<<b.ySize()<<endl;
+      #ifndef NDEBUG
+      cerr<<" a= "<<a<<"\n\n b= "<<b<<endl;
+      #endif
+      return Matrix<T>();
+    }
+    Matrix mOut(a.xSize(),a.ySize(),T());
+    for (int i=0; i<a.xSize(); i++ ){
+	for (int j=0; j<b.ySize(); j++ ) {
+	  mOut.set(i,j) += a.get(Id<int>(i),Id<int>(j))*b.get(Id<int>(i),Id<int>(j));
+	}
+    }
+    //cerr<<" a= "<<a<<"\n b= "<<b<<"\n c= "<<mOut<<endl<<endl;
+    return mOut;
+  }
+  friend Matrix<T> operator+ ( const Matrix<T>& a, const Matrix<T>& b ) {
+    if (a.xSize()!=b.xSize() || a.ySize()!=b.ySize()) {
+      cerr<<"ERROR: matrix addition requires matching dimensions"<<endl;
+      return Matrix<T>();
+    }
+    Matrix mOut(a.xSize(),b.ySize(),T());
+    for (int i=0; i<a.xSize(); i++ ){
+      for (int j=0; j<a.ySize(); j++ ) {
+	mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))+b.get(Id<int>(i),Id<int>(j));
+      }
+    }
+    return mOut;
+  }
+  friend Matrix<T> operator- ( const Matrix<T>& a, const Matrix<T>& b ) {
+    if (a.xSize()!=b.xSize() || a.ySize()!=b.ySize()) {
+      cerr<<"ERROR: matrix subtraction requires matching dimensions"<<endl;
+      //cerr<<"aSize="<<a.xSize<<","<<a.ySize()<<"     bSize="<<b.xSize<<","<<b.ySize()<<endl;
+      //cerr<<" a= "<<a<<"\n b= "<<b<<"\n c= "<<mOut<<endl<<endl;
+      return Matrix<T>();
+    }
+    Matrix mOut(a.xSize(),b.ySize(),T());
+    for (int i=0; i<a.xSize(); i++ ){
+      for (int j=0; j<a.ySize(); j++ ) {
+	mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))-b.get(Id<int>(i),Id<int>(j));
+      }
+    }
+    return mOut;
+  }
+  friend Matrix<T> operator* ( const Matrix<T>& a, const T& t ) {
+    Matrix mOut(a.xSize(),a.ySize());
+    for (int i=0; i<a.xSize(); i++ ){
+      for (int j=0; j<a.ySize(); j++ ) {
+	mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))*t;
+      }
+    }
+    return mOut;
+  }
+  friend Matrix<T> operator+ ( const Matrix<T>& a, const T& t ) {
+    Matrix mOut(a.xSize(),a.ySize());
+    for (int i=0; i<a.xSize(); i++ ){
+      for (int j=0; j<a.ySize(); j++ ) {
+	mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))+t;
+      }
+    }
+    return mOut;
+  }
+  friend Matrix<T> operator- ( const Matrix<T>& a, const T& t ) {
+    Matrix mOut(a.xSize(),a.ySize());
+    for (int i=0; i<a.xSize(); i++ ){
+      for (int j=0; j<a.ySize(); j++ ) {
+	mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))-t;
+      }
+    }
+    return mOut;
+  }
+  // Scalar inf-norm (max) of matrix / vector...
+  T infnorm ( ) const {
+    T tOut = T();
+    for (int i=0; i<xSize(); i++ ){
+      for (int j=0; j<ySize(); j++ ) {
+	if ( this->get(Id<int>(i),Id<int>(j))>tOut ) tOut = this->get(Id<int>(i),Id<int>(j));
+      }
+    }
+    return tOut;
+  }
+  /*
+  // Argmax of matrix / vector...  //NOT WORKING
+  pair<int,int> argmax ( ) const {
+    T tOut = T();
+    pair<int,int> ij();
+    for (int i=0; i<xSize(); i++ ){
+      for (int j=0; j<ySize(); j++ ) {
+	if ( this->get(Id<int>(i),Id<int>(j))>tOut ) {
+	  tOut = this->get(Id<int>(i),Id<int>(j));
+	  ij = make_pair(i,j);
+	}
+      }
+    }
+    return ij; //pair<int,int>( ij.getIndex(), ij.getIndex() );
+  }
+  */
+  // Diagonal matrix of vector...
+  friend Matrix<T> diag ( const Matrix<T>& a ) {
+    Matrix mOut(a.xSize(),a.xSize(),T()); // output is n x n
+    for (int i=0;i<a.xSize();i++) {
+      for (int j=0;j<a.ySize();j++) {
+        assert(j==0); // must be vector, n x 1
+        mOut.set(Id<int>(i),Id<int>(i)) += a.get(Id<int>(i),Id<int>(j));
+      }
+    }
+    return mOut;
+  }
+  // Ordering method (treat as bit string)...
+  bool operator< ( const Matrix<T>& mt ) const {
+    if (xSize()<mt.xSize() || ySize()<mt.ySize()) return true;
+    if (xSize()>mt.xSize() || ySize()>mt.ySize()) return false;
+    for (int i=0; i<xSize(); i++ ) {
+      for (int j=0; j<ySize(); j++ ) {
+	if ( this->get(Id<int>(i),Id<int>(j)) < mt.get(Id<int>(i),Id<int>(j)) ) return true;
+	else if ( this->get(Id<int>(i),Id<int>(j)) > mt.get(Id<int>(i),Id<int>(j)) ) return false;
+      }
+    }
+    return false;
+  }
+  bool operator== ( const Matrix<T>& a ) const {
+    if (xSize()!=a.xSize() || ySize()!=a.ySize()) return false;
+    for (int i=0;i<a.xSize();i++)
+      for (int j=0;j<a.ySize();j++)
+	if (this->get(Id<int>(i),Id<int>(j))!=a.get(Id<int>(i),Id<int>(j))) return false;
+    return true;
+  }
+  // Input/output methods...
+  friend ostream& operator<< ( ostream& os, const Matrix<T>& a ) {
+    os<<"\n    ";
+    for (int i=0;i<a.xSize();i++) {
+      for (int j=0;j<a.ySize();j++) {
+	os<<((j==0)?"":",")<<a.get(Id<int>(i),Id<int>(j));
+      }
+      os<<(i==a.xSize()-1?"\n":"\n    ");
+    }
+    return os;
+  }
+  friend String& operator<< ( String& str, const Matrix<T>& a ) {
+    str<<"\n    ";
+    for (int i=0;i<a.xSize();i++) {
+      for (int j=0;j<a.ySize();j++) {
+	str<<((j==0)?"":",")<<a.get(Id<int>(i),Id<int>(j));
+      }
+      str<<";";
+    }
+    return str;
+  }
+  string getString( ) const;
+};
+template <class T>
+string Matrix<T>::getString() const {
+  string str;
+  for (int i=0;i<xSize();i++) {
+    for (int j=0;j<ySize();j++) {
+      str += ((j==0)?"":",");
+      str += this->get(Id<int>(i),Id<int>(j));
+    }
+    str += ";";
+  }
+  return str;
+}

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-gauss.h ADDED Viewed

	@@ -0,0 +1,287 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_GAUSS__
+#define _NL_GAUSS__
+#include <vector>
+#include <string>
+#include <math.h>
+#include "nl-cpt.h"
+#include "nl-prob.h"
+#include "nl-list.h"
+using namespace std;
+static const PDFVal MEAN_THRESHOLD = 0.01; //0.0001; //0.001
+static const PDFVal VARIANCE_THRESHOLD = 0.01; //0.0001; //0
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Diagonal Multivariate Gaussian Model
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class Y>
+class DiagGauss1DModel : public Generic1DModel<Y,PDFVal> {
+ private:
+  // Member variables...
+  string                     sId;
+  bool                       bModeled;
+  PDFVal                     prInvPowSqrt2PI;
+  SimpleHash<Id<int>,PDFVal> aMeans;
+  SimpleHash<Id<int>,PDFVal> aVariances;
+  PDFVal                     prInvRootNormVariances;
+  PDFVal                     prProduct;
+  SimpleHash<Id<int>,PDFVal> algprNegHalfInvVariances;
+ public:
+  // Constructor / destructor methods...
+  DiagGauss1DModel ( )                 :         bModeled(false) { }
+  DiagGauss1DModel ( const string& s ) : sId(s), bModeled(false) { }
+  // Specification methods...
+  void precomputeVarianceTerms ( ) ;
+  PDFVal& setMean           ( int i ) { return aMeans.set(i);                   }
+  PDFVal& setVariance       ( int i ) { return aVariances.set(i);               }
+  PDFVal& setInvRootNormVar ( )       { return prInvRootNormVariances;          }
+  PDFVal& setNegHalfInvVar  ( int i ) { return algprNegHalfInvVariances.set(i); }
+  // Extraction methods...
+  PDFVal  getMean           ( int i )    const { return aMeans.get(i);                   }
+  PDFVal  getVariance       ( int i )    const { return aVariances.get(i);               }
+  PDFVal  getInvRootNormVar ( )          const { return prInvRootNormVariances;          }
+  PDFVal  getNegHalfInvVar  ( int i )    const { return algprNegHalfInvVariances.get(i); }
+  int     getNumFeats       ( )          const { return Y::getSize(); }
+  PDFVal  getProb           ( const Y& ) const ;
+  // Input / output methods...
+  bool    readFields  ( char*[], int ) ;
+  void    writeFields ( FILE*, const string& ) const ;
+};
+////////////////////////////////////////
+template <class Y>
+inline void DiagGauss1DModel<Y>::precomputeVarianceTerms ( ) {
+  // Inverse square root of norm of variances...
+  setInvRootNormVar() = 1.0;
+  for ( int i=0; i<getNumFeats(); i++ )  setInvRootNormVar() *= 1.0/sqrt(getVariance(i));
+  // Negative half of inverse of variances...
+  for ( int i=0; i<getNumFeats(); i++ )  setNegHalfInvVar(i) = -1.0/(2.0*getVariance(i));
+  // Derived from variance terms...
+  prInvPowSqrt2PI = 1.0/pow(sqrt(2.0*M_PI),getNumFeats());
+  prProduct       = prInvPowSqrt2PI * getInvRootNormVar();
+  bModeled = true;
+}
+////////////////////////////////////////
+template <class Y>
+inline PDFVal DiagGauss1DModel<Y>::getProb ( const Y& y ) const {
+//  fprintf(stderr,"--------------------\n");
+//  y.write(stderr);
+//  fprintf(stderr,"\n----------\n");
+//  writeFields(stderr,"");
+  assert(bModeled);
+  PDFVal logprob = 0.0;
+  for ( int i=0; i<getNumFeats(); i++ )
+    logprob += getNegHalfInvVar(i) * pow(y.get(i)-getMean(i),2.0);
+//  for ( int i=0; i<getNumFeats(); i++ )
+//    fprintf(stderr,"%d %g\n", i, getNegHalfInvVar(i) * pow(y.get(i)-getMean(i),2.0));
+//  fprintf(stderr,"----------> %g\n",prProduct * exp(logprob));
+  return ( prProduct * exp(logprob) ) ;
+}
+////////////////////////////////////////
+template <class Y>
+bool DiagGauss1DModel<Y>::readFields ( char* as[], int numFields ) {
+  if ( 0==strcmp(as[1],"m") && numFields>2 ) {
+    char* psT;
+    for(int i=0;i<getNumFeats();i++)
+      setMean(i)=atof(strtok_r((0==i)?as[2]:NULL,"_",&psT));
+  }
+  else if ( 0==strcmp(as[1],"v") && numFields>2 ) {
+    char* psT;
+    for(int i=0;i<getNumFeats();i++)
+      setVariance(i)=atof(strtok_r((0==i)?as[2]:NULL,"_",&psT));
+  }
+  else return false;
+  return true;
+}
+////////////////////////////////////////
+template <class Y>
+void DiagGauss1DModel<Y>::writeFields ( FILE* pf, const string& sPref ) const {
+  fprintf(pf,"%s m = ",sPref.c_str());
+  for(int i=0; i<getNumFeats(); i++) fprintf(pf,"%s%f",(0==i)?"":"_",getMean(i));
+  fprintf ( pf, "\n" ) ;
+  fprintf(pf,"%s v = ",sPref.c_str());
+  for(int i=0; i<getNumFeats(); i++) fprintf(pf,"%s%f",(0==i)?"":"_",getVariance(i));
+  fprintf ( pf, "\n" ) ;
+}
+////////////////////////////////////////////////////////////////////////////////
+/*
+template <class Y,class X>
+class DiagGauss2DModel : public Generic2DModel<Y,X,PDFVal> {
+ private:
+  // Member variables...
+  string                             sId;
+  SimpleHash<X,DiagGauss1DModel<Y> > mMY_giv_X;
+ public:
+  // Constructor / destructor methods...
+  DiagGauss2DModel ( const string& s ) : sId(s) { }
+  // Extraction methods...
+  Prob getProb ( const Y& y, const X& x ) const { return mMY_giv_X.get(x).getProb(y); }
+  // Input / output methods...
+  bool readFields  ( char* as[], int numFields ) {
+    ////if ( as[0]!=sId ) return false; // HAVE TO CHECK IN CALLIN FN NOW
+    if      ( 0==strcmp(as[1],"m") && numFields>3 )
+      for ( int i=0; i<numFields-3; i++ ) mMY_giv_X.set(X(as[2])).setMean(i)     = atof(as[i+4]) ;
+    else if ( 0==strcmp(as[1],"v") && numFields>3 )
+      for ( int i=0; i<numFields-3; i++ ) mMY_giv_X.set(X(as[2])).setVariance(i) = atof(as[i+4]) ;
+    else return false;
+    return true;
+  }
+  void writeFields ( FILE* pf, const string& sPref ) const {
+    X x;
+    for(bool b=x.setFirst(); b; b=x.setNext())
+      { fprintf(pf,"%s m ",sPref.c_str()); x.write(pf); fprintf(pf," =");
+        for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X.getProb(x).getMean(i));
+        fprintf ( pf, "\n" ) ; }
+    for(bool b=x.setFirst(); b; b=x.setNext())
+      { fprintf(pf,"%s v ",sPref.c_str()); x.write(pf); fprintf(pf," =");
+        for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X.getProb(x).getVariance(i));
+        fprintf ( pf, "\n" ) ; }
+  }
+};
+////////////////////////////////////////////////////////////////////////////////
+template <class Y,class X1,class X2>
+class DiagGauss3DModel : public Generic3DModel<Y,X1,X2,PDFVal> {
+ private:
+  // Member variables...
+  string                                            sId;
+  SimpleHash<Joint2DRV<X1,X2>,DiagGauss1DModel<Y> > mMY_giv_X1_X2;
+ public:
+  // Constructor / destructor methods...
+  DiagGauss3DModel ( const string& s ) : sId(s) { }
+  // Extraction methods...
+  Prob getProb ( const Y& y, const X1& x1, const X2& x2 ) const { return mMY_giv_X1_X2.get(x1,x2).getProb(y); }
+  // Input / output methods...
+  bool readFields  ( char* as[], int numFields ) {
+    if ( as[0]!=sId ) return false;
+    if      ( 0==strcmp(as[1],"m") && numFields>4 )
+      for ( int i=0; i<numFields-4; i++ ) mMY_giv_X1_X2.set(Joint2DRV<X1,X2>(X1(as[2]),X2(as[2]))).setMean(i)     = atof(as[i+4]) ;
+    else if ( 0==strcmp(as[1],"v") && numFields>4 )
+      for ( int i=0; i<numFields-4; i++ ) mMY_giv_X1_X2.set(Joint2DRV<X1,X2>(X1(as[2]),X2(as[2]))).setVariance(i) = atof(as[i+4]) ;
+    else return false;
+    return true;
+  }
+  void writeFields ( FILE* pf, string& sPref ) const {
+    X1 x1; X2 x2;
+    for(bool b1=x1.setFirst(); b1; b1=x1.setNext()) {
+      for(bool b2=x2.setFirst(); b2; b2=x2.setNext())
+        { fprintf(pf,"%s m ",sPref.c_str()); x1.write(pf); fprintf(pf," "); x2.write(pf); fprintf(pf," =");
+          for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X1_X2.get(Joint2DRV<X1,X2>(x1,x2)).getMean(i));
+          fprintf(pf,"\n"); }
+      for(bool b2=x2.setFirst(); b2; b2=x2.setNext())
+        { fprintf(pf,"%s v ",sPref.c_str()); x1.write(pf); fprintf(pf," "); x2.write(pf); fprintf(pf," =");
+          for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X1_X2.get(Joint2DRV<X1,X2>(x1,x2)).getVariance(i));
+          fprintf(pf,"\n"); }
+    }
+  }
+};
+*/
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Trainable Diagonal Multivariate Gaussian Model
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class Y>
+class TrainableDiagGauss1DModel : public DiagGauss1DModel<Y> {
+ public:
+  TrainableDiagGauss1DModel ( )                 : DiagGauss1DModel<Y>() { }
+  TrainableDiagGauss1DModel ( const string& s ) : DiagGauss1DModel<Y>(s) { }
+  // input / output methods...
+  void setFields ( const List<pair<const Y*,Prob> >& ) ;
+};
+////////////////////////////////////////
+template <class Y>
+void TrainableDiagGauss1DModel<Y>::setFields ( const List<pair<const Y*,Prob> >& lyp ) {
+  // For each dimension...
+  for ( int i=0; i<DiagGauss1DModel<Y>::getNumFeats(); i++ ) {
+    // Calc means...
+    double curMean = DiagGauss1DModel<Y>::getMean(i);
+    DiagGauss1DModel<Y>::setMean(i) = 0.0;
+    // For each Y...
+    for ( const ListedObject<pair<const Y*,Prob> >* pyp=lyp.getFirst(); pyp; pyp=lyp.getNext(pyp) ) {
+      const Y&    y      = *pyp->first;  // data value
+      const Prob& prEmpY = pyp->second;  // empirical prob
+      //printf("cal mean i=%d x1=%s x2=%s aaaprYpsi.get(yd,x1,x2)=%f\n", i, x1.getString(), x2.getString(), (double)aaaprYpsi.getProb(yd,x1,x2));
+      DiagGauss1DModel<Y>::setMean(i) += prEmpY * y.get(i);
+    }
+//    // If any change exceeds thresh, continue...
+//    if ( bShouldStop && ( curMean - DiagGauss1DModel<Y>::getMean(i) >  MEAN_THRESHOLD ||
+//                          curMean - DiagGauss1DModel<Y>::getMean(i) < -MEAN_THRESHOLD ) ) bShouldStop = false;
+    //printf("cal mean i=%d getMean(i)=%f\n", i, DiagGauss1DModel<Y>::getMean(i));
+    // Calc variances...
+    double curVar = DiagGauss1DModel<Y>::getVariance(i);
+    DiagGauss1DModel<Y>::setVariance(i) = 0.0;
+    // For each Y...
+    for ( const ListedObject<pair<const Y*,Prob> >* pyp=lyp.getFirst(); pyp; pyp=lyp.getNext(pyp) ) {
+      const Y&    y      = *pyp->first;  // data value
+      const Prob& prEmpY = pyp->second;  // empirical prob
+      //printf("cal var i=%d yd=%s %f %f %f\n", i, yd.getString(), aaaprYpsi.get(yd,x1,x2), getMean(x1,x2,i), yd.get(i));
+      DiagGauss1DModel<Y>::setVariance(i) += prEmpY * pow(DiagGauss1DModel<Y>::getMean(i)-y.get(i),2) ;
+    }
+//    // If any change exceeds thresh, continue...
+//    if ( bShouldStop && ( curVar - DiagGauss1DModel<Y>::getVariance(i) >  VARIANCE_THRESHOLD ||
+//                          curVar - DiagGauss1DModel<Y>::getVariance(i) < -VARIANCE_THRESHOLD ) ) bShouldStop = false;
+    // Avoid div by zero...
+    if (DiagGauss1DModel<Y>::getVariance(i) < 1.0) DiagGauss1DModel<Y>::setVariance(i) = 1.0;
+    //printf("cal variance i=%d var=%f\n", i, DiagGauss1DModel<Y>::getVariance(i));
+  }
+  DiagGauss1DModel<Y>::precomputeVarianceTerms();
+}
+#endif /*_NL_GAUSS__*/

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hash.h ADDED Viewed

	@@ -0,0 +1,105 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef __NL_HASH_H_
+#define __NL_HASH_H_
+#include <cassert>
+//#include <tr1/unordered_map>
+#include <ext/hash_map>
+using namespace __gnu_cxx;
+///////////////////////////////////////////////////////////////////////////////
+template<class T>
+class SimpleHashFn {
+ public:
+  size_t operator() ( const T& t ) const { return t.getHashKey(); }
+};
+template<class T>
+class SimpleHashEqual {
+ public:
+  bool operator() ( const T& t1, const T& t2 ) const { return (t1 == t2); }
+};
+template<class X, class Y>
+class SimpleHash : public hash_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > /*public tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> >*/ {
+ private:
+   typedef hash_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > OrigHash;
+//  typedef tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > OrigHash;
+//  tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > mxy;
+  static const Y yDummy;
+  //static Y yNonconstDummy;
+ public:
+//  typedef typename OrigHash::const_iterator const_iterator;
+//  typedef typename OrigHash::iterator iterator;
+//  static const const_iterator iDummy;
+  // Constructor / destructor methods...
+  SimpleHash ( )       : OrigHash()  { }
+  SimpleHash ( int i ) : OrigHash(i) { }
+  SimpleHash (const SimpleHash& s) : OrigHash(s) { }
+  // Specification methods...
+  Y&       set ( const X& x )       { return OrigHash::operator[](x); }
+  // Extraction methods...
+  const Y& get      ( const X& x ) const { return (OrigHash::end()!=OrigHash::find(x)) ? OrigHash::find(x)->second : yDummy; }
+  bool     contains ( const X& x ) const { return (OrigHash::end()!=OrigHash::find(x)); }
+//  const Y& get ( const X& x ) const { return (mxy.end()!=mxy.find(x)) ? mxy.find(x)->second : yDummy; }
+//  Y&       set ( const X& x )       { return mxy[x]; }
+  friend ostream& operator<< ( ostream& os, const SimpleHash<X,Y>& h ) {
+    for ( typename SimpleHash<X,Y>::const_iterator it=h.begin(); it!=h.end(); it++ )
+      os<<((it==h.begin())?"":",")<<it->first<<":"<<it->second;
+    return os;
+  }
+};
+template<class X, class Y> const Y SimpleHash<X,Y>::yDummy = Y();
+//template<class X, class Y>  Y SimpleHash<X,Y>::yNonconstDummy; // = Y();
+/*
+template<class X, class Y>
+class SimpleMultiHash : public tr1::unordered_multimap<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > {
+ private:
+  typedef tr1::unordered_multimap<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > OrigHash;
+ public:
+  typedef pair<typename OrigHash::const_iterator,typename OrigHash::const_iterator> const_iterator_pair;
+  // Constructor / destructor methods...
+  SimpleMultiHash ( )       : OrigHash()  { }
+  SimpleMultiHash ( int i ) : OrigHash(i) { }
+  // Specification methods...
+  Y& add ( const X& x )  { return insert(typename OrigHash::value_type(x,Y()))->second; }
+  // Extraction methods...
+  bool contains ( const X& x )             const { return (OrigHash::end()!=OrigHash::find(x)); }
+  bool contains ( const X& x, const Y& y ) const {
+    if (OrigHash::end()==OrigHash::find(x)) return false;
+    for ( const_iterator_pair ii=OrigHash::equal_range(x); ii.first!=ii.second; ii.first++ )
+      if ( y == ii.first->second ) return true;
+    return false;
+  }
+};
+*/
+#endif // __NL_HASH_H_

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-heap.h ADDED Viewed

	@@ -0,0 +1,181 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_MINHEAP_
+#define _NL_MINHEAP_
+#include "nl-safeids.h"
+////////////////////////////////////////////////////////////////////////////////
+template <class R, bool outrank(const R&, const R&)>
+class Heap {
+ private:
+  Array<R> at;
+  int      iNextToFill;
+  //SafeArray1D<Id<int>,R> at;
+  // Private specification methods...
+  int      heapify ( unsigned int ) ;
+ public:
+  // Constructor / destructor methods...
+  Heap<R,outrank> ( )                   : at(10),  iNextToFill(0) { }
+  Heap<R,outrank> ( int i )             : at(i),   iNextToFill(0) { }
+  Heap<R,outrank> ( int i, const R& r ) : at(i,r), iNextToFill(0) { }
+  // Specification methods...
+  void     init       ( int i )                { iNextToFill=0; at.init(i); }
+  void     clear      ( )                      { iNextToFill=0; }
+  unsigned int fixIncRank ( unsigned int i );
+  unsigned int fixDecRank ( unsigned int i );
+  R&       set        ( unsigned int i )       { return at.set(i-1); }
+  void     enqueue    ( const R& r )           { set(iNextToFill+1)=r; fixIncRank(iNextToFill+1); iNextToFill++; }
+  R        dequeueTop ( )                      { R r=get(1); iNextToFill--; set(1)=get(iNextToFill+1); set(iNextToFill+1)=R(); fixDecRank(1); return r; }
+  ////R&       set        ( const Id<int>& i )       { return at.set(i); }
+  R&       setTop     ( )                      { return at.set(1-1); }
+  // Extraction methods...
+  int      getSize    ( ) const                { return iNextToFill; }
+  const R& getTop     ( ) const                { return at.get(1-1); }
+  const R& get        ( unsigned int i ) const { return at.get(i-1); }
+  ////const R& get        ( const Id<int>& i ) const { return at.get(i); }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const Heap<R,outrank>& h ) { for(int i=0;i<h.iNextToFill;i++) os<<h.at.get(i)<<"\n"; return os; }
+};
+////////////////////////////////////////////////////////////////////////////////
+template <class R, bool outrank(const R&, const R&)>
+int Heap<R,outrank>::heapify ( unsigned int ind ) {
+  // Find best of parent, left child, right child...
+  unsigned int indBest = ind;
+  indBest = (ind*2 <= (unsigned int)iNextToFill &&
+	     outrank(get(ind*2),get(indBest)))
+            ? ind*2 : indBest;
+  indBest = (ind*2+1 <= (unsigned int)iNextToFill &&
+	     outrank(get(ind*2+1),get(indBest)))
+	    ? ind*2+1 : indBest;
+  // If parent isn't best, restore heap property...
+  if ( indBest != ind ) {
+    // Swap heap elements...
+    R rTemp      = get(ind);
+    set(ind)     = get(indBest);
+    set(indBest) = rTemp;
+    // Recurse...
+    return heapify(indBest);
+  }
+  else return ind;
+}
+template <class R, bool outrank(const R&, const R&)>
+unsigned int Heap<R,outrank>::fixIncRank ( unsigned int ind ) {     //const R& rec ) {
+  // If child outranks parent, restore heap property...
+  if ( outrank(get(ind),get((ind==1)?1:ind/2)) ) {
+    // Swap heap elements...
+    R rTemp               = get((ind==1)?1:ind/2);
+    set((ind==1)?1:ind/2) = get(ind);
+    set(ind)              = rTemp;
+    // Recurse on parent...
+    return fixIncRank(ind/2);
+  }
+  else return ind;
+}
+template <class R, bool outrank(const R&, const R&)>
+unsigned int Heap<R,outrank>::fixDecRank ( unsigned int ind ) {     //const R& rec ) {
+  return heapify(ind);
+}
+////////////////////////////////////////////////////////////////////////////////
+template <class R>
+class MinHeap {
+ private:
+  Array<R> at;
+  //SafeArray1D<Id<int>,R> at;
+  // Private specification methods...
+  int      minHeapify ( unsigned int ) ;
+ public:
+  // Constructor / destructor methods...
+  MinHeap<R> ( )                   : at(10)  { }
+  MinHeap<R> ( int i )             : at(i)   { }
+  MinHeap<R> ( int i, const R& r ) : at(i,r) { }
+  // Specification methods...
+  void     init       ( int i )                  { at.init(i); }
+  void     clear      ( )                        { at.clear(); }
+  int      fixDecr    ( int i );
+  int      fixIncr    ( int i );
+  R&       set        ( unsigned int i )         { return at.set(i); }
+  ////R&       set        ( const Id<int>& i )       { return at.set(i); }
+  R&       setMin     ( )                        { return at.set(1-1); }
+  // Extraction methods...
+  int      getSize    ( ) const                  { return at.getSize(); }
+  const R& getMin     ( ) const                  { return at.get(1-1); }
+  const R& get        ( unsigned int i ) const   { return at.get(i); }
+  ////const R& get        ( const Id<int>& i ) const { return at.get(i); }
+};
+////////////////////////////////////////////////////////////////////////////////
+template <class R>
+int MinHeap<R>::minHeapify ( unsigned int ind ) {
+  // Find min of parent, left child, right child...
+  unsigned int indMin = ind ;
+  indMin = (ind*2 <= (unsigned int)at.getSize() &&
+            at.get(ind*2-1).getScore() < at.get(indMin-1).getScore())
+              ? ind*2 : indMin ;
+  indMin = (ind*2+1 <= (unsigned int)at.getSize() &&
+            at.get(ind*2+1-1).getScore() < at.get(indMin-1).getScore())
+              ? ind*2+1 :indMin;
+  // If parent isn't min, restore heap property...
+  if ( indMin != ind ) {
+    // Swap heap elements...
+    R rTemp          = at.get(ind-1);
+    at.set(ind-1)    = at.get(indMin-1);
+    at.set(indMin-1) = rTemp;
+    // Recurse...
+    return minHeapify(indMin);
+  }
+  else return ind;
+}
+template <class R>
+int MinHeap<R>::fixDecr ( int i ) {     //const R& rec ) {
+  // If parent isn't min, restore heap property...
+  if ( at.get((i+1)/2).getScore() > at.get(i).getScore() ) {
+    // Swap heap elements...
+    R rTemp         = at.get((i+1)/2);
+    at.set((i+1)/2) = at.get(i);
+    at.set(i)       = rTemp;
+    // Recurse on parent...
+    return fixDecr((i+1)/2);
+  }
+  else return i;
+}
+template <class R>
+int MinHeap<R>::fixIncr ( int i ) {     //const R& rec ) {
+  return minHeapify(i+1)-1;
+}
+#endif //_NL_HEAP_

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h ADDED Viewed

	@@ -0,0 +1,397 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_HMMLOOP_
+#define _NL_HMMLOOP_
+#include <list>
+#include <string>
+#include <boost/thread/thread.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/bind.hpp>
+#include "nl-prob.h"
+#include "nl-safeids.h"
+#include "nl-beam.h"
+typedef int Frame;
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  NullBackDat - default empty back-pointer data; can replace with word or sem relation
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class Y>
+class NullBackDat {
+  static const string sDummy;
+  char dummy_data_member_to_avoid_compile_warning;
+ public:
+  NullBackDat ()             { dummy_data_member_to_avoid_compile_warning=0; }
+  NullBackDat (const Y& y)   { dummy_data_member_to_avoid_compile_warning=0; }
+  void write  (FILE*) const  { }
+  string getString() const   { return sDummy; }
+  friend ostream& operator<< ( ostream& os, const NullBackDat& nb ) { return os; }
+};
+template <class Y>
+const string NullBackDat<Y>::sDummy ( "" );
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Index - pointer to source in previous beam heap
+//
+////////////////////////////////////////////////////////////////////////////////
+class Index : public Id<int> {
+ public:
+  Index             ( )     { }
+  Index             (int i) {set(i);}
+  Index& operator++ ( )     {set(toInt()+1); return *this;}
+};
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  TrellNode - node in viterbi trellis
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class S, class B>
+class TrellNode {
+ private:
+  // Data members...
+  Index   indSource;
+  B       backptrData;
+  S       sId;
+  LogProb lgprMax;
+ public:
+  // Constructor / destructor methods...
+  TrellNode ( ) { }
+  TrellNode ( const Index& indS, const S& sI, const B& bDat, LogProb lgpr)
+    { indSource=indS; sId=sI; lgprMax=lgpr; backptrData=bDat; /* fo = -1; */ }
+  // Specification methods...
+  const Index& setSource  ( ) const { return indSource; }
+  const B&     setBackData( ) const { return backptrData; }
+  const S&     setId      ( ) const { return sId; }
+  LogProb&     setScore   ( )       { return lgprMax; }
+  // Extraction methods...
+  bool operator== ( const TrellNode<S,B>& tnsb ) const { return(sId==tnsb.sId); }
+//  size_t       getHashKey ( ) const { return sId.getHashKey(); }
+  const Index& getSource  ( ) const { return indSource; }
+  const B&     getBackData( ) const { return backptrData; }
+  const S&     getId      ( ) const { return sId; }
+  LogProb      getLogProb ( ) const { return lgprMax; }
+  LogProb      getScore   ( ) const { return lgprMax; }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const TrellNode& tn ) { return os<<tn.indSource<<","<<tn.backptrData<<","<<tn.sId<<","<<tn.lgprMax; }
+};
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  HMMLoop
+//
+////////////////////////////////////////////////////////////////////////////////
+template <class MY, class MX, class S=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
+class HMMLoop {
+ private:
+  typedef std::pair<Index,B> IB;
+  // Data members...
+  MY modY;
+  MX modX;
+  SafeArray2D<Id<Frame>,Id<int>,TrellNode<S,B> > aatnTrellis;
+  const int BEAM_WIDTH, LOOP_LENGTH;
+  Frame  frameLast;
+  int    iNextNode;
+ public:
+  // Static member varaibles...
+  static bool OUTPUT_QUIET;
+  static bool OUTPUT_NOISY;
+  static bool OUTPUT_VERYNOISY;
+//  static int  BEAM_WIDTH;
+  // Constructor / destructor methods...
+  HMMLoop ( int, const char*[], int, int, const S& ) ;
+  // Specification methods...
+//  void init         ( int, int, const S& ) ;
+//  void init         ( int, int, SafeArray1D<Id<int>,pair<S,LogProb> >* );
+  const TrellNode<S,B>& update       ( const typename MX::RandVarType& ) ;
+  const TrellNode<S,B>& getTrellNode ( Frame t, Index i ) { return aatnTrellis.get(t%LOOP_LENGTH,i); }
+  TrellNode<S,B>&       setTrellNode ( Frame t, Index i ) { return aatnTrellis.set(t%LOOP_LENGTH,i); }
+ /*
+  void updateSerial ( const typename MX::RandVarType& ) ;
+  void updatePara   ( const typename MX::RandVarType& ) ;
+  void each         ( const typename MX::RandVarType&, Beam<LogProb,S,IB>&, SafeArray1D<Id<int>,std::pair<std::pair<S,IB>,LogProb> >& ) ;
+  // Extraction methods...
+  const TrellNode<S,B>& getTrellNode ( int i ) const { return aatnTrellis.get(frameLast,i); }
+  int getBeamUsed ( int ) const ;
+  // Input / output methods...
+  void writeMLS  ( FILE* ) const ;
+  void writeMLS  ( FILE*, const S& ) const ;
+  double getCurrSum(int) const;
+  void writeCurr ( FILE*, int ) const ;
+  void writeCurrSum ( FILE*, int ) const ;
+  void gatherElementsInBeam( SafeArray1D<Id<int>,pair<S,LogProb> >* result, int f ) const;
+  void writeCurrEntropy ( FILE*, int ) const;
+  //void writeCurrDepths ( FILE*, int ) const;
+  void writeFoll ( FILE*, int, int, const typename MX::RandVarType& ) const ;
+  void writeFollRanked ( FILE*, int, int, const typename MX::RandVarType&, bool ) const ;
+  std::list<string> getMLS() const;
+  std::list<TrellNode<S,B> > getMLSnodes() const;
+  std::list<string> getMLS(const S&) const;
+  std::list<TrellNode<S,B> > getMLSnodes(const S&) const;
+ */
+};
+template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_QUIET     = false;
+template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_NOISY     = false;
+template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_VERYNOISY = false;
+//template <class MY, class MX, class S, class B> int  HMMLoop<MY,MX,S,B>::BEAM_WIDTH       = 1;
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+template <class MY, class MX, class S, class B>
+HMMLoop<MY,MX,S,B>::HMMLoop ( int nArgs, const char* apsArgs[], int w, int l, const S& sInit ) : BEAM_WIDTH(w), LOOP_LENGTH(l) {
+  // For each model file in command line arguments...
+  for ( int iArg=1; iArg<nArgs; iArg++ ) {
+    // Try to open model file...
+    FILE* pf = fopen(apsArgs[iArg],"r");
+    // Complain if can't open model file...
+    if ( NULL == pf ) {
+      cout<<"ERROR: can't open file '"<<apsArgs[iArg]<<"'!\n";
+      cout<<"Terminating process with failure code 1.\n";
+      exit(1);
+    }
+    // Initialize stream buffer and line number...
+    IStreamSource iss(pf);
+    int linenum=0;
+    cout<<"Reading file '"<<apsArgs[iArg]<<"'...\n";
+    // For each line of input...
+    for ( IStream is(iss),is1; IStream()!=is; is=is1,iss.compress() ) {
+      // Increment line number...
+      linenum++;
+      // Count off every 100K lines...
+      if (linenum%100000==0) cout<<"  Reading line "<<linenum<<"...\n";
+      // Try to read each line into each model...
+      String s;
+      if ( (is1=(is>>"#">>s>>"\n")) == IStream() &&
+           (is1=(is>>modY>>  "\n")) == IStream() &&
+           (is1=(is>>modX>>  "\n")) == IStream() &&
+           (is1=(is>>s   >>  "\n")) != IStream() )
+        // Complain if bad format...
+        cout<<"  ERROR in '"<<apsArgs[iArg]<<"', line "<<linenum<<": can't process '"<<s<<"'!\n";
+    }
+    cout<<"Done reading file '"<<apsArgs[iArg]<<"'.\n";
+    fclose(pf);
+  }
+  cout<<"Done reading all model files.\n";
+  //modY.dump(cout,"Y");
+  //modX.dump(cout,"X");
+  // Alloc trellis...
+  aatnTrellis.init(LOOP_LENGTH,BEAM_WIDTH);
+  frameLast=LOOP_LENGTH;
+  // Set initial element at first time slice...
+  setTrellNode(frameLast,0) = TrellNode<S,B> ( Index(0), sInit, B(), 0 ) ;
+  cout<<"Begin processing input...\n";
+  IStreamSource iss(stdin);
+  typename MX::RandVarType x;
+  // For each frame...
+  for ( IStream is(iss); is!=IStream(); iss.compress() ) {
+//    // Show beam...
+//    cout<<"-----BEAM:t="<<frameLast-LOOP_LENGTH<<"-----\n";
+//    for(int i=0;i<BEAM_WIDTH;i++)
+//      cout<<getTrellNode(frameLast,i)<<"\n";
+//    cout<<"--------------\n";
+    // Read spectrum (as frame audio)...
+    is=is>>x;
+//    // Show spectrum...
+//    cout<<frameLast-2*LOOP_LENGTH+1<<" "<<x<<"\n";
+//    // Show spectrum with bin numbers...
+//    cout<<frameLast-2*LOOP_LENGTH+1;
+//    for(int i=0; i<NUM_FREQUENCIES; i++)
+//      cout<<((i==0)?' ':',')<<i<<":"<<x.get(i);
+//    cout<<"\n";
+    // Update trellis...
+    const TrellNode<S,B>& tn = update(x);
+    // Show recognized hidden variable values...
+    cout<<frameLast-2*LOOP_LENGTH+1<<":'"<<tn<<"'\n";
+    cout.flush();
+  }
+  cout<<"Done processing input.\n";
+}
+////////////////////////////////////////////////////////////////////////////////
+template <class A, class B>
+inline bool outRank ( const quad<A,B,LogProb,Id<int> >& a1,
+                      const quad<A,B,LogProb,Id<int> >& a2 ) { return (a1.third>a2.third); }
+template <class MY, class MX, class S, class B>
+const TrellNode<S,B>& HMMLoop<MY,MX,S,B>::update ( const typename MX::RandVarType& x ) {
+  // Increment frame counter...
+  frameLast++;
+  // Init beam for new frame...
+  Beam<LogProb,S,IB> btn(BEAM_WIDTH);
+  SafeArray1D<Id<int>,std::pair<std::pair<S,IB>,LogProb> > atnSorted (BEAM_WIDTH);
+  typedef quad<int,typename MY::IterVal,LogProb,Id<int> > SHPI;
+  Heap < SHPI, outRank<int,typename MY::IterVal > > ashpiQueue;
+  SHPI shpi, shpiTop;
+  int aCtr;
+  ashpiQueue.clear();
+  //shpi.first  = -1;
+  //shpi.second = YModel::IterVal();
+  //shpi.third  = 1.0;
+  shpi.first = 0;
+  shpi.third  = getTrellNode(frameLast-1,shpi.first).getScore();
+  shpi.third *= modY.setIterProb ( shpi.second, getTrellNode(frameLast-1,shpi.first).getId(), aCtr=-1 );   // , x, aCtr=-1 );
+  //S s; modY.setTrellDat(s,shpi.second);
+  shpi.fourth = -1;
+  ////cerr<<"????? "<<shpi<<"\n";
+  ashpiQueue.enqueue(shpi);
+  bool bFull=false;
+  // For each ranked value of transition destination...
+  for ( int iTrg=0; !bFull && ashpiQueue.getSize()>0; iTrg++ ) {
+    // Iterate A* (best-first) search until a complete path is at the top of the queue...
+    while ( ashpiQueue.getSize() > 0 && ashpiQueue.getTop().fourth < MY::IterVal::NUM_ITERS ) {
+      // Remove top...
+      shpiTop = ashpiQueue.dequeueTop();
+      // Fork off (try to advance each elementary variable a)...
+      for ( int a=shpiTop.fourth.toInt(); a<=MY::IterVal::NUM_ITERS; a++ ) {
+        // Copy top into new queue element...
+        shpi = shpiTop;
+        // At variable position -1, advance beam element for transition source...
+        if ( a == -1 ) shpi.first++;
+        // Incorporate prob from transition source...
+        shpi.third = getTrellNode(frameLast-1,shpi.first).getScore();
+        if ( shpi.third > LogProb() ) {
+          // Try to advance variable at position a and return probability (subsequent variables set to first, probability ignored)...
+          shpi.third *= modY.setIterProb ( shpi.second, getTrellNode(frameLast-1,shpi.first).getId(), aCtr=a );   // , x, aCtr=a );
+          // At end of variables, incorporate observation probability...
+          if ( a == MY::IterVal::NUM_ITERS && shpi.fourth != MY::IterVal::NUM_ITERS )
+            shpi.third *= modX.getProb ( x, S(shpi.second) );
+            //// { S s; modY.setTrellDat(s,shpi.second); shpi.third *= modX.getProb(x,s); }
+          // Record variable position at which this element was forked off...
+          shpi.fourth = a;
+          //cerr<<" from partial: "<<shpiTop<<"\n   to partial: "<<shpi<<"\n";
+          if ( shpi.third > LogProb() ) {
+            ////if ( frameLast == 4 ) cerr<<" from partial: "<<shpiTop<<"\n   to partial: "<<shpi<<"\n";
+            // If valid, add to queue...
+            ashpiQueue.enqueue(shpi);
+            //cerr<<"--------------------\n"<<ashpiQueue;
+          }
+        }
+      }
+      // Remove top...
+      //cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
+      //if ( ashpiQueue.getTop().fourth != MY::IterVal::NUM_ITERS ) ashpiQueue.dequeueTop();
+      ////cerr<<"/-----B-----\\\n"<<ashpiQueue<<"\\-----B-----/\n";
+      ////cerr<<ashpiQueue.getSize()<<" queue elems, "<<ashpiQueue.getTop()<<"\n";
+    }
+    ////cerr<<"-----*-----\n"<<ashpiQueue<<"-----*-----\n";
+    ////cerr<<ashpiQueue.getSize()<<" queue elems **\n";
+    // Add best transition (top of queue)...
+    //modX.getProb(o,modY.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second));
+    if ( ashpiQueue.getSize() > 0 ) {
+      S s ( ashpiQueue.getTop().second );
+      ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second);
+      bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,B(ashpiQueue.getTop().second)), ashpiQueue.getTop().third );
+      ////cerr<<ashpiQueue.getSize()<<" queue elems A "<<ashpiQueue.getTop()<<"\n";
+      ////cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
+      ashpiQueue.dequeueTop();
+      ////cerr<<"/-----B-----\\\n"<<ashpiQueue<<"\\-----B-----/\n";
+      ////cerr<<ashpiQueue.getSize()<<" queue elems B "<<ashpiQueue.getTop()<<"\n";
+      //cerr<<"."; cerr.flush();
+    }
+  }
+  ////cerr<<"-----*-----\n"<<ashpiQueue<<"-----*-----\n";
+  btn.sort(atnSorted);
+  // Copy sorted beam to trellis...
+  Index iOriginOfBest;
+  int j=0;
+  for(int i=0;i<BEAM_WIDTH;i++) {
+    const std::pair<std::pair<S,IB>,LogProb>* tn1 = &atnSorted.get(i);
+    Index iOrigin = tn1->first.second.first;
+    // Determine origin at beginning of loop...
+    for ( Frame t=frameLast-1; t>frameLast-LOOP_LENGTH+1; t-- )
+      iOrigin = getTrellNode(t,iOrigin).getSource();
+    if ( 0 == i ) iOriginOfBest = iOrigin;
+    // If new hypothesis has same origin, add to beam...
+    if ( iOriginOfBest == iOrigin ) {
+      setTrellNode(frameLast,j++)=TrellNode<S,B>(tn1->first.second.first,
+                                                 tn1->first.first,
+                                                 tn1->first.second.second,
+                                                 tn1->second);
+    }
+  }
+  // Clear out rest of beam...
+  for ( ; j<BEAM_WIDTH; j++ )
+    setTrellNode(frameLast,j) = TrellNode<S,B>();
+  ////modY.update();
+  return getTrellNode(frameLast-LOOP_LENGTH+1,iOriginOfBest);
+}
+#endif //_NL_HMMLOOP_

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-iomacros.h ADDED Viewed

	@@ -0,0 +1,63 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef NL_IO_MACROS__
+#define NL_IO_MACROS__
+//#include <sys/types.h>
+//#include <sys/socket.h>
+//#include <netinet/in.h>
+//#include <netdb.h>
+#define NUM(c)                   ((c>='0' && c<='9'))
+#define ALPHANUM(c)              ((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9'))
+#define SPACE(c)                 (c==' ')
+#define WHITESPACE(c)            (c==' ' || c=='\t' || c=='\n')
+#define CONSUME_OPT(f,c,b,l)     c = (b) ? getc(f)+0*(l+=(c=='\n')?1:0) : c
+#define CONSUME_ONE(f,c,b,s,l)   c = (b) ? getc(f)+0*(l+=(c=='\n')?1:0) : c+(0*fprintf(stderr,"\nERROR: %s in line %d (char=%c).\n\n",s,l,c))
+#define CONSUME_ONE_STDIN(c,b,s,l) c = (b) ? getchar()+0*(l+=(c=='\n')?1:0) : c+(0*fprintf(stderr,"\nERROR: %s in line %d.\n\n",s,l))
+#define CONSUME_ALL(f,c,b,l)     for ( ; b; c=getc(f)+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_ALL_STDIN(c,b,l) for ( ; b; c=getchar()+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_STR(f,c,b,s,i,l)      for ( i=0; (b) || false != (s[i++]='\0'); s[i++]=c, c=getc(f)+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_STR_SAFE(f,c,b,s,i,m,l) for ( i=0; i<m-1&&((b)||false!=(s[i++]='\0')); s[i++]=c, c=getc(f)+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_STR_STDIN(c,b,s,i,l)  for ( i=0; (b) || false != (s[i++]='\0'); s[i++]=c, c=getchar()+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_INT(f,c,i,l)     for ( i=0; (c>='0' && c<='9'); i=(i*10)+(c-'0'), c=getc(f)+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_INT_STDIN(c,i,l) for ( i=0; (c>='0' && c<='9'); i=(i*10)+(c-'0'), c=getchar()+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_DEC(f,c,i,j,l)   for ( j=1; (c>='0' && c<='9'); j*=10, i+=(c-'0')/j, c=getc(f)+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_HEX(f,c,i,l)     for ( i=0; (c>='0' && c<='9') || (c>='a' && c<='f'); i=(i*16)+((c<'a')?c-'0':c+10-'a'), c=getc(f)+0*(l+=(c=='\n')?1:0) )
+#define CONSUME_ALL_SOCKET(f,c,b,l)     for ( char s[1]; b; c=((recv(f,&s[0],1,MSG_WAITALL)==1) ? s[0]+0*(l+=(c=='\n')?1:0) : '\0') )
+#define CONSUME_STR_SOCKET(f,c,b,s,i,l) for ( i=0; (b) || ('\0'!=(s[i++]='\0')); s[i++]=c, c=((recv(f,&s[i],1,MSG_WAITALL)==1) ? s[i]+0*(l+=(c=='\n')?1:0) : (s[i]='\0')) )
+//#define CONSUME_ALL_STRING(f,c,b,l)     for ( int ii=0; b && f[ii]!='\0'; c=f[ii]+0*(l+=(c=='\n')?1:0), ii++ )
+//#define CONSUME_STR_STRING(f,c,b,s,i,l) for ( i=0; (b && f[i]!='\0') || false != (s[i++]='\0'); s[i++]=c, c=f[i]+0*(l+=(c=='\n')?1:0) )
+#endif //_NL_IO_MACROS__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-list.h ADDED Viewed

	@@ -0,0 +1,481 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_LIST_ //////////////////////////////////////////////////////////////
+#define _NL_LIST_ //////////////////////////////////////////////////////////////
+#include <cstdlib>
+#define Listed(x) ListedObject<x>
+////////////////////////////////////////////////////////////////////////////////
+//
+//  Container macros
+//
+////////////////////////////////////////////////////////////////////////////////
+// Standard loop...
+#define foreach(p,c) for ( p=(c).getNext(NULL); p!=NULL; p=(c).getNext(p) )
+// True unless proven false...
+#define setifall(y,p,c,x) for ( p=(c).getNext(NULL), y=true; p!=NULL && y; y &= (x), p=(c).getNext(p) )
+// False unless proven true...
+#define setifexists(y,p,c,x) for ( p=(c).getNext(NULL), y=false; p!=NULL && !y; y |= (x), p=(c).getNext(p) )
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+class Ptr
+  {
+  private:
+    T* ptObj ;
+  public:
+    Ptr ( )                                       { ptObj=NULL; }
+    Ptr ( T* pt )                                 { ptObj=pt; }
+    Ptr ( T& t  )                                 { ptObj=&t; }
+    Ptr ( const Ptr<T>& pt )                      { ptObj=pt.ptObj; }
+    bool    operator>  ( const Ptr<T>& pt ) const { return(ptObj>pt.ptObj); }
+    bool    operator<  ( const Ptr<T>& pt ) const { return(ptObj<pt.ptObj); }
+    bool    operator>= ( const Ptr<T>& pt ) const { return(ptObj>=pt.ptObj); }
+    bool    operator<= ( const Ptr<T>& pt ) const { return(ptObj<=pt.ptObj); }
+    bool    operator== ( const Ptr<T>& pt ) const { return(ptObj==pt.ptObj); }
+    bool    operator!= ( const Ptr<T>& pt ) const { return(ptObj!=pt.ptObj); }
+    Ptr<T>& operator=  ( const Ptr<T>& pt )       { ptObj=pt.ptObj; return *this; }
+    T&      operator*  ( ) const                  { return *ptObj; }
+    T*      operator-> ( ) const                  { return ptObj; }
+  } ;
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+class ListedObject ;
+template <class T>
+class List {
+ private:
+  ListedObject<T>* plotLast ;
+ public:
+  typedef ListedObject<T>*       iterator;
+  typedef const ListedObject<T>* const_iterator;
+  // Constructor and destructor methods...
+  List  ( ) ;
+  List  ( const T& ) ;
+  List  ( const List<T>& ) ;
+  List  ( const List<T>&, const List<T>& ) ;
+  ~List ( ) ;
+  // Overloaded operators...
+  List<T>& operator=  ( const List<T>& ) ;
+  List<T>& operator+= ( const List<T>& ) ;
+  bool     operator== ( const List<T>& ) const ;
+  bool     operator!= ( const List<T>& ) const ;
+  // Specification methods...
+  void     clear   ( ) ;
+  T&       insert  ( Listed(T)* ) ;
+  void     remove  ( Listed(T)* ) ;
+  T&       add     ( ) ;
+  T&       push    ( ) ;
+  void     pop     ( ) ;
+  Listed(T)* setFirst ( ) ;
+  Listed(T)* setNext  ( Listed(T)* ) ;
+  // Extraction methods...
+  const_iterator begin ( ) const { return getNext(NULL); }
+  const_iterator end   ( ) const { return NULL; }
+  iterator& operator++ ( ) { *this=getNext(*this); return *this; }
+  int        getCard  ( ) const ;
+  Listed(T)* getFirst ( ) const ;
+  Listed(T)* getSecond( ) const ;
+  Listed(T)* getLast  ( ) const ;
+  Listed(T)* getNext  ( const Listed(T)* ) const ;
+  bool       contains ( const T& ) const ;
+  bool       isEmpty  ( ) const ;
+/*   // Input / output methods... */
+/*   friend IStream operator>> ( pair<IStream,List<T>*> is_x, const char* psDlm ) { */
+/*     IStream& is =  is_x.first; */
+/*     List<T>& x  = *is_x.second; */
+/*     if (IStream()!=is) */
+/*       is = pair<IStream,T*>(is,&x.add())>>psDlm; */
+/*     return is; */
+/*   } */
+} ;
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+class ListedObject : public T
+  {
+  friend class List<T> ;
+  private:
+    ListedObject<T>* plotNext ;
+  public:
+    const ListedObject<T>* next ( ) const { return plotNext; }
+    ListedObject ( ) { plotNext = NULL; }
+    ListedObject ( const ListedObject<T>& lot )
+      { T::operator=(lot); }
+    ListedObject<T>& operator= ( const ListedObject<T>& lot )
+      { T::operator=(lot); return(*this); }
+    operator T() { return *this; }
+  } ;
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>::List ( )
+  {
+  plotLast = NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>::List ( const T& t )
+  {
+  plotLast = NULL ;
+  add() = t ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>::List ( const List<T>& lt )
+  {
+  ListedObject<T>* pt ;
+  plotLast = NULL ;
+  foreach ( pt, lt )
+    add() = *pt ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>::List ( const List<T>& lt1, const List<T>& lt2 )
+  {
+  ListedObject<T>* pt ;
+  plotLast = NULL ;
+  foreach ( pt, lt1 )
+    add() = *pt ;
+  foreach ( pt, lt2 )
+    add() = *pt ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>::~List ( )
+  {
+    clear();
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+void List<T>::clear ( )
+  {
+  ListedObject<T>* plot ;
+  ListedObject<T>* plot2 ;
+  if ( NULL != (plot = plotLast) )
+    do { plot2 = plot->plotNext ;
+      ////fprintf(stderr,"list::destr %x\n",plot);
+         delete plot ;
+       } while ( plotLast != (plot = plot2) ) ;
+  plotLast = NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>& List<T>::operator= ( const List<T>& lt )
+  {
+  Listed(T)* pt ;
+  this->~List ( ) ;
+  plotLast = NULL ;
+  foreach ( pt, lt )
+    add() = *pt ;
+  return *this ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+List<T>& List<T>::operator+= ( const List<T>& lt )
+  {
+  Listed(T)* pt ;
+  foreach ( pt, lt )
+    add() = *pt ;
+  return *this ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+bool List<T>::operator== ( const List<T>& lt ) const
+  {
+  Listed(T)* pt1 ;
+  Listed(T)* pt2 ;
+  for ( pt1 = getNext(NULL), pt2 = lt.getNext(NULL);
+        pt1 != NULL && pt2 != NULL ;
+        pt1 = getNext(pt1), pt2 = lt.getNext(pt2) )
+    if ( !(*pt1 == *pt2) ) return false ;
+  return ( pt1 == NULL && pt2 == NULL ) ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+bool List<T>::operator!= ( const List<T>& lt ) const
+  {
+  return !(*this == lt) ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+T& List<T>::insert ( Listed(T)* plotPrev )
+  {
+  ListedObject<T>* plot = new ListedObject<T> ;
+  ////fprintf(stderr,"list::const %x\n",plot);
+  if ( NULL != plotPrev )
+    {
+    plot->plotNext = plotPrev->plotNext ;
+    plotPrev->plotNext = plot ;
+    if ( plotLast == plotPrev )
+      plotLast = plot ;
+    }
+  else if ( NULL != plotLast )
+    {
+    plot->plotNext = plotLast->plotNext ;
+    plotLast->plotNext = plot ;
+    }
+  else
+    {
+    plot->plotNext = plot ;
+    plotLast = plot ;
+    }
+  return *plot ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+/* DON'T KNOW WHY THIS DOESN'T WORK
+template <class T>
+void List<T>::remove ( Listed(T)* plot )
+  {
+  assert ( plot );
+  assert ( plotLast );
+  // If only one element...
+  if ( plot->plotNext == plot )
+    {
+    assert ( plotLast == plot );
+    plotLast = NULL;
+fprintf(stderr,"list::delete1 %x\n",plot);
+    delete plot;
+    }
+  // If more than one element...
+  else
+    {
+    if ( plotLast == plot->plotNext ) plotLast = plot;
+    Listed(T)* plotTemp = plot->plotNext;
+    *plot = *(plot->plotNext);
+fprintf(stderr,"list::delete2 %x\n",plotTemp);
+    delete plotTemp;
+    }
+  }
+*/
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+T& List<T>::add ( )
+  {
+  ListedObject<T>* plot = new ListedObject<T> ;
+  ////fprintf(stderr,"list::add %x\n",plot);
+  if ( NULL != plotLast )
+    {
+    plot->plotNext = plotLast->plotNext ;
+    plotLast->plotNext = plot ;
+    plotLast = plot ;
+    }
+  else
+    {
+    plot->plotNext = plot ;
+    plotLast = plot ;
+    }
+  return *plot ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+T& List<T>::push ( )
+  {
+  ListedObject<T>* plot = new ListedObject<T> ;
+  ////fprintf(stderr,"list::push %x\n",plot);
+  if ( NULL != plotLast )
+    {
+    plot->plotNext = plotLast->plotNext ;
+    plotLast->plotNext = plot ;
+    }
+  else
+    {
+    plot->plotNext = plot ;
+    plotLast = plot ;
+    }
+  return *plot ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+void List<T>::pop ( )
+  {
+  ListedObject<T>* plot = plotLast->plotNext ;
+  if ( plot->plotNext == plot )
+    plotLast = NULL ;
+  else
+    plotLast->plotNext = plot->plotNext ;
+  ////fprintf(stderr,"list::pop %x\n",plot);
+  delete plot ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+int List<T>::getCard ( ) const
+  {
+  Listed(T)* pt ;
+  int        i = 0 ;
+  foreach ( pt, *this )
+    i++ ;
+  return i ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+ListedObject<T>* List<T>::setFirst ( )
+  {
+  return ( NULL != plotLast ) ? plotLast->plotNext : NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+ListedObject<T>* List<T>::setNext ( ListedObject<T>* plot )
+  {
+  return ( NULL == plot && NULL != plotLast ) ? plotLast->plotNext :
+         ( plot != plotLast ) ? plot->plotNext : NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+ListedObject<T>* List<T>::getFirst ( ) const
+  {
+  return ( NULL != plotLast ) ? plotLast->plotNext : NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+ListedObject<T>* List<T>::getSecond ( ) const
+  {
+  return getNext(getFirst()) ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+ListedObject<T>* List<T>::getLast ( ) const
+  {
+  return ( NULL != plotLast ) ? plotLast : NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+ListedObject<T>* List<T>::getNext ( const ListedObject<T>* plot ) const
+  {
+  return ( NULL == plot && NULL != plotLast ) ? plotLast->plotNext :
+         ( plot != plotLast ) ? plot->plotNext : NULL ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+bool List<T>::contains ( const T& t ) const
+  {
+  ListedObject<T>* pt ;
+  foreach ( pt, *this )
+    if ( t == *pt ) return true ;
+  return false ;
+  }
+////////////////////////////////////////////////////////////////////////////////
+template <class T>
+bool List<T>::isEmpty ( ) const
+  {
+  return ( NULL == plotLast ) ;
+  }
+#endif //_NL_LIST_ /////////////////////////////////////////////////////////////

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-matrix.h ADDED Viewed

	@@ -0,0 +1,177 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+template<class I,class J,class P>
+class SparseMatrix : public SimpleHash<I,SimpleHash<J,P> > {
+ public:
+  typedef SimpleHash<I,SimpleHash<J,P> > Parent;
+  //// Matrix / vector operator methods...
+  friend SparseMatrix<I,J,P> operator* ( const SparseMatrix<I,J,P>& a, const SparseMatrix<I,J,P>& b ) {
+    SparseMatrix mOut;
+    for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator kit=a.get(i).begin(); kit!=a.get(i).end(); kit++ ) {
+	I k = kit->first;
+        for ( typename SimpleHash<J,P>::const_iterator jit=b.get(k).begin(); jit!=b.get(k).end(); jit++ ) {
+	  I j = jit->first;
+          if ( a.get(i).get(k)!=0 && b.get(k).get(j)!=0 )
+            mOut.set(i).set(j) += a.get(i).get(k) * b.get(k).get(j);
+        }
+      }
+    }
+    return mOut;
+  }
+  friend SparseMatrix<I,J,P> operator+ ( const SparseMatrix<I,J,P>& a, const SparseMatrix<I,J,P>& b ) {
+    SparseMatrix mOut;
+    for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
+	I j = jit->first;
+        mOut.set(i).set(j) = a.get(i).get(j);
+      }
+    }
+    for ( typename Parent::const_iterator iit=b.begin(); iit!=b.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=b.get(i).begin(); jit!=b.get(i).end(); jit++ ) {
+	I j = jit->first;
+        mOut.set(i).set(j) += b.get(i).get(j);
+      }
+    }
+    return mOut;
+  }
+  friend SparseMatrix<I,J,P> operator- ( const SparseMatrix<I,J,P>& a, const SparseMatrix<I,J,P>& b ) {
+    SparseMatrix mOut;
+    for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
+	I j = jit->first;
+        mOut.set(i).set(j) = a.get(i).get(j);
+      }
+    }
+    for ( typename Parent::const_iterator iit=b.begin(); iit!=b.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=b.get(i).begin(); jit!=b.get(i).end(); jit++ ) {
+	I j = jit->first;
+        mOut.set(i).set(j) -= b.get(i).get(j);
+      }
+    }
+    return mOut;
+  }
+  // Matrix + scalar operators...
+  friend SparseMatrix<I,J,P> operator+ ( const SparseMatrix<I,J,P>& a, const P& p ) {
+    SparseMatrix mOut;
+    for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
+	I j = jit->first;
+        mOut.set(i).set(j) = a.get(i).get(j) + p;
+      }
+    }
+    return mOut;
+  }
+  friend SparseMatrix<I,J,P> operator- ( const SparseMatrix<I,J,P>& a, const P& p ) {
+    SparseMatrix mOut;
+    for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
+	I j = jit->first;
+        mOut.set(i).set(j) = a.get(i).get(j) - p;
+      }
+    }
+    return mOut;
+  }
+  // Diagonal matrix of vector...
+  friend SparseMatrix<I,J,P> diag ( const SparseMatrix<I,J,P>& a ) {
+    SparseMatrix mOut;
+    for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
+        I j = jit->first;
+        assert(j==0); // must be vector
+        mOut.set(i).set(i) += a.get(i).get(j);
+      }
+    }
+    return mOut;
+  }
+  // Scalar inf-norm (max) of matrix / vector...
+  P infnorm ( ) const {
+    P pOut = 0;  // sparse matrix assumes some values are zero, so this is default infnorm.
+    for ( typename Parent::const_iterator iit=Parent::begin(); iit!=Parent::end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=Parent::get(i).begin(); jit!=Parent::get(i).end(); jit++ ) {
+	I j = jit->first;
+        if ( Parent::get(i).get(j) > pOut ) pOut = Parent::get(i).get(j);
+      }
+    }
+    return pOut;
+  }
+  // Scalar one-norm (sum) of matrix / vector...
+  P onenorm ( ) const {
+    P sum=0;
+    for ( typename Parent::const_iterator iit=Parent::begin(); iit!=Parent::end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=Parent::get(i).begin(); jit!=Parent::get(i).end(); jit++ ) {
+	I j = jit->first;
+	sum += Parent::get(i).get(j);
+      }
+    }
+    return sum;
+  }
+  //// Input / output methods...
+  friend pair<StringInput,SparseMatrix<I,J,P>*> operator>> ( StringInput si, SparseMatrix<I,J,P>& m ) {
+    return pair<StringInput,SparseMatrix<I,J,P>*>(si,&m);
+  }
+  friend StringInput operator>> ( pair<StringInput,SparseMatrix<I,J,P>*> si_m, const char* psD ) {
+    if (StringInput(NULL)==si_m.first) return si_m.first;
+    StringInput si; I i,j; P p;
+    si=si_m.first>>i>>" : ">>j>>" = ">>p>>psD;
+    if ( si!=NULL ) si_m.second->set(i).set(j) = p;
+    return si;
+  }
+  friend ostream& operator<< ( ostream& os, const SparseMatrix<I,J,P>& m ) {
+    int ctr=0;
+    for ( typename Parent::const_iterator iit=m.begin(); iit!=m.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=m.get(i).begin(); jit!=m.get(i).end(); jit++ ) {
+	I j = jit->first;
+        os<<((0==ctr++)?"":",")<<i<<":"<<j<<"="<<m.get(i).get(j);
+      }
+    }
+    return os;
+  }
+  friend String&  operator<< ( String& str, const SparseMatrix<I,J,P>& m ) {
+    int ctr=0;
+    for ( typename Parent::const_iterator iit=m.begin(); iit!=m.end(); iit++ ) {
+      I i = iit->first;
+      for ( typename SimpleHash<J,P>::const_iterator jit=m.get(i).begin(); jit!=m.get(i).end(); jit++ ) {
+	I j = jit->first;
+        str<<((0==ctr++)?"":",")<<i<<j;
+      }
+    }
+    return str;
+  }
+};

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h ADDED Viewed

	@@ -0,0 +1,126 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_MODEL_FILE__
+#define _NL_MODEL_FILE__
+#include "nl-string.h"
+#include "nl-iomacros.h"
+#include <netinet/in.h>
+static bool OUTPUT_QUIET = false;
+///////////////////////////////////////////////////////////////////////////////
+void processModelFilePtr ( FILE* pf, bool rF(Array<char*>&) ) {
+  int i=0; int numFields=0; int c=' '; int line=1;
+  CONSUME_ALL(pf,c,WHITESPACE(c),line);                           // Get to first record
+  while ( c!=EOF ) {                                              // For each record
+    if ( c=='#' ) CONSUME_ALL(pf, c, c!='\n' && c!='\0', line ) ; //   If comment, consume
+    else {                                                        //   If no comment,
+      Array<char*> aps(100);
+      String       psBuff(1000);
+      CONSUME_STR ( pf, c, (c!='\n' && c!='\0'), psBuff, i, line );
+      char* psT=NULL;
+      for(int i=0;true;i++) {
+        char* z = strtok_r ( (0==i)?psBuff.c_array():NULL, " :=", &psT );
+        if (!z) break;
+        aps[i]=z;
+      }
+      if ( !rF(aps) )                                             //     Try to process fields, else complain
+        fprintf( stderr, "\nERROR: %d %d-arg %s in line %d\n\n", numFields, aps.size(), aps[0], line);
+    }
+    CONSUME_ALL(pf,c,WHITESPACE(c),line);                         //   Consume whitespace
+  }
+}
+///////////////////////////////////////////////////////////////////////////////
+void processModelFile ( const char* ps, bool rF(Array<char*>&) ) {
+  FILE* pf;
+  if(!OUTPUT_QUIET) fprintf ( stderr, "Reading model file %s...\n", ps ) ;
+  if ( NULL == (pf=fopen(ps,"r")) )                               // Complain if file not found
+    fprintf ( stderr, "\nERROR: file %s could not be opened.\n\n", ps ) ;
+  processModelFilePtr ( pf, rF );
+  fclose(pf);
+  if(!OUTPUT_QUIET) fprintf ( stderr, "Model file %s loaded.\n", ps ) ;
+}
+///////////////////////////////////////////////////////////////////////////////
+void processModelSocket ( const int tSockfd, int& c, bool rF(Array<char*>&) ) {
+  int i=0; int numFields=0; int line=1;
+  CONSUME_ALL_SOCKET(tSockfd,c,WHITESPACE(c),line);                                          // Get to first record
+  while ( c!='\0' && c!='\5' ) {                                                             // For each record
+    if ( c=='#' ) CONSUME_ALL_SOCKET(tSockfd, c, (c!='\n' && c!='\0' && c!='\5'), line ) ;   //   If comment, consume
+    else {                                                                                   //   If no comment,
+      Array<char*> aps(100);
+      String       psBuff(1000);
+      CONSUME_STR_SOCKET ( tSockfd, c, (c!='\n' && c!='\0' && c!='\5'), psBuff, i, line );
+      ////cerr<<"|"<<psBuff.c_array()<<"|"<<endl;
+      char* psT=NULL;
+      for(int i=0;true;i++) {
+        char* z = strtok_r ( (0==i)?psBuff.c_array():NULL, " :=", &psT );
+        if (!z) break;
+        aps[i]=z;
+      }
+      if ( !rF(aps) )                                                     //     Try to process fields, else complain
+        fprintf( stderr, "\nERROR: %d-arg %s in line %d\n\n", numFields, aps[0], line);
+    }
+    CONSUME_ALL_SOCKET(tSockfd,c,WHITESPACE(c),line);                     //   Consume whitespace
+  }
+}
+void processModelSocket ( const int tSockfd, bool rF(Array<char*>&) ) {
+  int c=' ';
+  processModelSocket ( tSockfd, c, rF );
+}
+///////////////////////////////////////////////////////////////////////////////
+/*
+void processModelString ( String& sBuff, bool rF(Array<char*>&) ) {
+  if ('#'!=sBuff[0]) {
+    Array<char*> aps(100);
+    char* psT=NULL;
+    for(int i=0;true;i++) {
+      char* z = strtok_r ( (0==i)?sBuff.c_array():NULL, " :=", &psT );
+      if (!z) break;
+      aps[i]=z;
+    }
+    if ( !rF(aps) )                                                     //     Try to process fields, else complain
+      fprintf( stderr, "\nERROR: %d-arg %s in line %d\n\n", numFields, aps[0], line);
+  }
+}
+*/
+///////////////////////////////////////////////////////////////////////////////
+#endif //_NL_MODEL_FILE__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-prob.h ADDED Viewed

	@@ -0,0 +1,136 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_PROB__
+#define _NL_PROB__
+#include "nl-safeids.h"
+#include <math.h>
+#include <limits.h>
+////////////////////////////////////////////////////////////////////////////////
+typedef double PDFVal;
+typedef double LogPDFVal;
+////////////////////////////////////////////////////////////////////////////////
+class Prob {
+   private:
+      double gVal;
+   public:
+      Prob ( )              { gVal = 0.0; }
+      Prob (double d)       { gVal = d;   }
+      Prob (const char* ps) { gVal = atof(ps); }
+      operator double() const { return gVal; }
+      double toDouble() const { return gVal; }
+      Prob& operator+= ( const Prob p ) { gVal += p.gVal; return *this; }
+      Prob& operator-= ( const Prob p ) { gVal -= p.gVal; return *this; }
+      Prob& operator*= ( const Prob p ) { gVal *= p.gVal; return *this; }
+      Prob& operator/= ( const Prob p ) { gVal /= p.gVal; return *this; }
+      friend ostream& operator<< ( ostream& os, const Prob& pr ) { return os<<pr.toDouble(); }
+      friend String&  operator<< ( String& str, const Prob& pr ) { return str<<pr.toDouble(); }
+      friend pair<StringInput,Prob*> operator>> ( StringInput si, Prob& n ) { return pair<StringInput,Prob*>(si,&n); }
+      friend StringInput             operator>> ( pair<StringInput,Prob*> si_n, const char* psDlm ) {
+        double d=0.0; StringInput si=si_n.first>>d>>psDlm; *si_n.second=Prob(d); return si; }
+};
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  LogProb -- encapsulate min probability in sum operations
+//
+////////////////////////////////////////////////////////////////////////////////
+//#define MIN_LOG_PROB (1-MAXINT)
+#define MIN_LOG_PROB INT_MIN
+class LogProb : public Id<int> {
+ public:
+  // Constructor / destructor methods...
+  LogProb ( )          { set(MIN_LOG_PROB); }
+  LogProb ( int i )    { set(i); }
+  LogProb ( double d ) { set(int(100.0*log(d))); }
+  LogProb ( Prob   d ) { set(int(100.0*log(d))); }
+  // Specification methods...
+  LogProb& operator+= ( const LogProb i ) { assert(false); return *this; }
+  LogProb& operator*= ( const LogProb i )
+    {   if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
+          set(MIN_LOG_PROB);
+        }else{
+          // Correct underflow if result is greater than either addend...
+          int k=toInt()+i.toInt(); set((toInt()<=0 && i.toInt()<=0 && (k>i.toInt() || k>toInt())) ? MIN_LOG_PROB : k);
+        }
+        return *this;
+    }
+  LogProb& operator-= ( const LogProb i ) { assert(false); return *this; }
+  LogProb& operator/= ( const LogProb i )
+    {   if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
+            set(MIN_LOG_PROB);
+        }else{
+          int k=toInt()-i.toInt(); set(k);
+        }
+        return *this;
+    }
+  // Extraction methods...
+  bool    operator==( const LogProb i ) const { return(i.toInt()==toInt()); }
+  bool    operator!=( const LogProb i ) const { return(i.toInt()!=toInt()); }
+  LogProb operator+ ( const LogProb i ) const { assert(false); return *this; }   // no support for addition in log mode!
+  LogProb operator- ( const LogProb i ) const { assert(false); return *this; }   // no support for addition in log mode!
+  LogProb operator* ( const LogProb i ) const {
+    int k;
+    if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
+      k = MIN_LOG_PROB;
+    }else{
+      k=toInt()+i.toInt();
+      // Correct underflow if result is greater than either addend...
+      k = (toInt()<0 && i.toInt()<0 && (k>i.toInt() || k>toInt())) ? MIN_LOG_PROB : k;
+    }
+    return LogProb(k);
+  }
+  LogProb operator/ ( const LogProb i ) const {
+    int k;
+    if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
+      k = MIN_LOG_PROB;
+    }else{
+      k = toInt()-i.toInt();
+      // // Correct underflow if result is greater than either addend...
+      // k = (toInt()<0 && -i.toInt()<0 && (k>-i.toInt() || k>toInt())) ? MIN_LOG_PROB : k;
+    }
+    return LogProb(k);
+  }
+  Prob   toProb()   const { return exp(double(toInt())/100.0);  }
+  double toDouble() const { return toProb().toDouble(); }
+//  operator double() const { return exp(toInt()/100.0); }
+  friend ostream& operator<< ( ostream& os, const LogProb& lp ) { return os<<lp.toInt(); }
+  friend String&  operator<< ( String& str, const LogProb& lp ) { return str<<lp.toInt(); }
+  friend pair<StringInput,LogProb*> operator>> ( StringInput si, LogProb& n ) { return pair<StringInput,LogProb*>(si,&n); }
+  friend StringInput                operator>> ( pair<StringInput,LogProb*> si_n, const char* psDlm ) {
+    double d=0.0; StringInput si=si_n.first>>d>>psDlm; *si_n.second=LogProb(d); return si; }
+};
+#endif /* _NL_PROB__ */

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-racpt.h ADDED Viewed

	@@ -0,0 +1,332 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_RACPT__
+#define _NL_RACPT__
+template<class K, class P>
+class GenericRACPTModel : public SimpleHash<K,P> {
+ private:
+  typedef SimpleHash<K,P> HKP;
+//  typedef typename SimpleHash<Y,P>::const_iterator IYP;
+  //HKYP h;
+ public:
+  //typedef Y RVType;
+  //typedef BaseIterVal<std::pair<IYP,IYP>,Y> IterVal;
+  //typedef typename HKYP::const_iterator const_key_iterator;
+  bool contains ( const K& k ) const {
+    return ( SimpleHash<K,P>::contains(k) );
+  }
+/*
+  P getProb ( const IterVal& ikyp, const K& k ) const {
+    if ( ikyp.iter.first == ikyp.iter.second ) { cerr<<"ERROR: no iterator to fix probability: "<<k<<endl; return P(); }
+    return ( ikyp.iter.first->second );
+  }
+*/
+  P getProb ( const K& k ) const {
+    return SimpleHash<K,P>::get(k);
+  }
+  P& setProb ( const K& k ) {
+    return SimpleHash<K,P>::set(k);
+  }
+/*
+  void normalize ( ) {
+    for ( typename HKYP::const_iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) {
+      K k=ik->first;
+      P p=P();
+      IterVal y;
+      for(bool by=setFirst(y,k); by; by=setNext(y,k))
+        p+=getProb(y,k);
+      if (p!=P())
+        for(bool by=setFirst(y,k); by; by=setNext(y,k))
+          setProb(y,k)/=p;
+    }
+  }
+*/
+/*
+   void transmit ( int tSockfd, const char* psId ) const {
+    for ( typename HKYP::const_iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) {
+      K k=ik->first;
+      IterVal y;
+      // For each non-zero probability in model...
+      for ( bool b=setFirst(y,k); b; b=setNext(y,k) ) {
+        //if ( getProb(y,k) != P() ) {
+          String str(1000);
+          str<<psId<<" "<<k<<" : "<<y<<" = "<<getProb(y,k).toDouble()<<"\n";
+          if ( send(tSockfd,str.c_array(),str.size()-1,MSG_EOR) != int(str.size()-1) )
+            {cerr<<"ERROR writing to socket\n";exit(0);}
+        //}
+      }
+    }
+  }
+*/
+  void dump ( ostream& os, const char* psId ) const {
+    for ( typename HKP::const_iterator ik=HKP::begin(); ik!=HKP::end(); ik++ ) {
+      K k=ik->first;
+      os << psId<<" "<<k<<" = "<<getProb(k).toDouble()<<endl;
+//      IterVal y;
+//      for ( bool b=setFirst(y,k); b; b=setNext(y,k) )
+//        os<<psId<<" "<<k<<" : "<<y<<" = "<<getProb(y,k).toDouble()<<"\n";
+    }
+  }
+  void subsume ( GenericRACPTModel<K,P>& m ) {
+    for ( typename HKP::const_iterator ik=m.HKP::begin(); ik!=m.HKP::end(); ik++ ) {
+      K k=ik->first;
+      setProb(k) = m.getProb(k);
+//      IterVal y;
+//      for ( bool b=m.setFirst(y,k); b; b=m.setNext(y,k) )
+//        setProb(y,k) = m.getProb(y,k);
+    }
+  }
+  void clear ( ) { SimpleHash<K,P>::clear(); }
+  friend pair<StringInput,GenericRACPTModel<K,P>*> operator>> ( StringInput si, GenericRACPTModel<K,P>& m ) {
+    return pair<StringInput,GenericRACPTModel<K,P>*>(si,&m); }
+  friend StringInput operator>> ( pair<StringInput,GenericRACPTModel<K,P>*> delimbuff, const char* psD ) {
+    K k;
+    StringInput si,si2,si3;
+    GenericRACPTModel<K,P>& m = *delimbuff.second;
+    si=delimbuff.first;
+    if ( si==NULL ) return si;
+    // Kill the colon since we're treating the whole thing as the condition
+    char * str = si.c_str();
+    char * p = strchr(str, ':');
+    if(p){
+      p[0] = ' ';
+    }
+    si=str;
+    while((si2=si>>" ")!=NULL)si=si2;
+    si=si>>k>>" ";
+    while((si2=si>>" ")!=NULL)si=si2;
+    si=si>>"= ";
+    while((si2=si>>" ")!=NULL)si=si2;
+    return (si!=NULL) ? si>>m.setProb(k)>>psD : si;
+  }
+};
+template<class Y, class P>
+class RandAccCPT1DModel : public GenericRACPTModel<MapKey1D<Y>,P> {
+ public:
+//  typedef typename GenericCPTModel<Y,MapKey1D<Unit>,P>::IterVal IterVal;
+  bool contains ( const Y& y ) const {
+    return GenericRACPTModel<MapKey1D<Y>,P>::contains ( MapKey1D<Y>(y) );
+  }
+/*
+  P getProb ( const IterVal& ixyp ) const {
+    return GenericCPTModel<MapKey1D<Y>,P>::getProb ( ixyp, MapKey1D<Y>(Y()) );
+  }
+*/
+  P getProb ( const Y& y ) const {
+    return GenericRACPTModel<MapKey1D<Y>,P>::getProb ( MapKey1D<Y>(y) );
+  }
+/*
+P& setProb ( const Y& y ) {
+    cerr << "setProb called on racpt1d" << endl;
+    return GenericRACPTModel<MapKey1D<Y>,P>::setProb ( MapKey1D<Y>(y) );
+  }
+*/
+  /*
+  bool readFields ( Array<char*>& aps ) {
+    if ( 3==aps.size() ) {
+      GenericRACPTModel<MapKey1D<Y>,P>::setProb ( MapKey1D<Y>(aps[1]) ) = atof(aps[2]);
+      return true;
+    }
+    return false;
+  }
+  */
+};
+////////////////////
+template<class Y, class X1, class P>
+class RandAccCPT2DModel : public GenericRACPTModel<MapKey2D<X1,Y>,P> {
+ public:
+  // This stuff only for deterministic 'Determ' models...
+//  typedef X1 Dep1Type;
+//  typedef P ProbType;
+//  MapKey1D<X1> condKey;
+  bool contains ( const Y& y, const X1& x1 ) const {
+//    MapKey2D<X1,Y> temp = MapKey2D<X1,Y>(x1,y);
+    return GenericRACPTModel<MapKey2D<X1,Y>,P>::contains ( MapKey2D<X1,Y>(x1,y) );
+  }
+  P getProb ( const Y& y, const X1& x1 ) const {
+    return GenericRACPTModel<MapKey2D<X1,Y>,P>::getProb ( MapKey2D<X1,Y>(x1,y) );
+  }
+/*
+  P& setProb ( const Y& y, const X1& x1 ) {
+    cerr << "setProb called on racpt2d" << endl;
+    return GenericRACPTModel<MapKey2D<Y,X1>,P>::setProb ( MapKey2D<Y,X1>(y,x1) );
+  }
+*/
+};
+////////////////////
+template<class Y, class X1, class X2, class P>
+class RandAccCPT3DModel : public GenericRACPTModel<MapKey3D<X1,X2,Y>,P> {
+ public:
+  bool contains ( const Y& y, const X1& x1, const X2& x2 ) const {
+    return GenericRACPTModel<MapKey3D<X1,X2,Y>,P>::contains ( MapKey3D<X1,X2,Y>(x1,x2,y) );
+  }
+  P getProb ( const Y& y, const X1& x1, const X2& x2 ) const {
+    return GenericRACPTModel<MapKey3D<X1,X2,Y>,P>::getProb ( MapKey3D<X1,X2,Y>(x1,x2,y) );
+  }
+/*
+  P& setProb ( const Y& y, const X1& x1, const X2& x2 ) {
+    return GenericRACPTModel<MapKey3D<X1,X2,Y>,P>::setProb (  MapKey3D<Y,X1,X2>(x1,x2,y) );
+  }
+*/
+};
+/*
+////////////////////
+template<class Y, class X1, class X2, class X3, class P>
+class CPT4DModel : public GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P> {
+ public:
+  typedef typename GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::IterVal IterVal;
+  bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setFirst ( ixyp, MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setNext ( ixyp, MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) const {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::contains ( y, MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  bool contains ( const X1& x1, const X2& x2, const X3& x3 ) const {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::contains ( MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::getProb ( ixyp, MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) const {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::getProb ( y, MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) {
+    return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setProb ( y, MapKey3D<X1,X2,X3>(x1,x2,x3) );
+  }
+  bool readFields ( Array<char*>& aps ) {
+    if ( 6==aps.size() ) {
+      GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setProb ( Y(aps[4]), MapKey3D<X1,X2,X3>(aps[1],aps[2],aps[3]) ) = atof(aps[5]);
+      return true;
+    }
+    return false;
+  }
+};
+////////////////////
+template<class Y, class X1, class X2, class X3, class X4, class P>
+class CPT5DModel : public GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P> {
+ public:
+  typedef typename GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::IterVal IterVal;
+  bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setFirst ( ixyp, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setNext ( ixyp, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::contains ( y, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::contains ( MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::getProb ( ixyp, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::getProb ( y, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) {
+    return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setProb ( y, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
+  }
+  bool readFields ( Array<char*>& aps ) {
+    if ( 7==aps.size() ) {
+      GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setProb ( Y(aps[5]), MapKey4D<X1,X2,X3,X4>(aps[1],aps[2],aps[3],aps[4]) ) = atof(aps[6]);
+      return true;
+    }
+    return false;
+  }
+};
+////////////////////
+template<class Y, class X1, class X2, class X3, class X4, class X5, class P>
+class RACPT6DModel : public GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P> {
+ public:
+  typedef typename GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::IterVal IterVal;
+  bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setFirst ( ixyp, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setNext ( ixyp, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::contains ( y, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::contains ( MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::getProb ( ixyp, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::getProb ( y, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) {
+    return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setProb ( y, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
+  }
+  bool readFields ( Array<char*>& aps ) {
+    if ( 8==aps.size() ) {
+      GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setProb ( Y(aps[6]), MapKey5D<X1,X2,X3,X4,X5>(aps[1],aps[2],aps[3],aps[4],aps[5]) ) = atof(aps[7]);
+      return true;
+    }
+    return false;
+  }
+};
+*/
+#endif //_NL_RACPT__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-randvar.h ADDED Viewed

	@@ -0,0 +1,593 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_RAND_VAR__
+#define _NL_RAND_VAR__
+#include <math.h>
+#include <string>
+#include "nl-string.h"
+#include "nl-safeids.h"
+#include "nl-stringindex.h"
+#include "nl-prob.h"
+#include "nl-hash.h"
+////////////////////////////////////////////////////////////////////////////////
+template <class A, class B, class C>
+class trip {
+ public:
+  trip ( ) { }
+  trip ( A& a, B& b, C& c ) : first(a), second(b), third(c) { }
+  A first;
+  B second;
+  C third;
+  friend ostream& operator<< ( ostream& os, const trip<A,B,C>& a ) { return os<<a.first<<","<<a.second<<","<<a.third; }
+};
+template <class A, class B, class C, class D>
+class quad {
+ public:
+  quad ( ) { }
+  quad ( A& a, B& b, C& c, D& d ) : first(a), second(b), third(c), fourth(d) { }
+  A first;
+  B second;
+  C third;
+  D fourth;
+  friend ostream& operator<< ( ostream& os, const quad<A,B,C,D>& a ) { return os<<a.first<<","<<a.second<<","<<a.third<<","<<a.fourth; }
+};
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+//
+//  DiscreteDomainRV template -- creates RV with a distinct set of values for domain T (unique class)
+//
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////
+template <class T>
+class DiscreteDomain : public StringIndex {
+ public:
+  typedef T ValType;
+  int MAX_SIZE ;
+  DiscreteDomain ( )       : StringIndex()  { }
+  DiscreteDomain ( int i ) : StringIndex()  { MAX_SIZE=i; }
+  int addIndex ( const char* ps ) { int i=StringIndex::addIndex(ps); assert(i==T(i)); return i; }
+};
+////////////////////////////////////////////////////////////
+template <class T, DiscreteDomain<T>& domain>
+class DiscreteDomainRV : public Id<T> {
+ private:
+  static String strTemp;
+ public:
+  typedef DiscreteDomainRV<T,domain> BaseType;
+  static const int NUM_VARS = 1;
+  ////////////////////
+  template<class P>
+  class ArrayDistrib : public Array<pair<DiscreteDomainRV<T,domain>,P> > {
+  };
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public pair<SafePtr<const ArrayDistrib<P> >,Id<int> > {
+   public:
+    static const int NUM_ITERS = NUM_VARS;
+    operator DiscreteDomainRV<T,domain>() const { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
+    //const DiscreteDomainRV<T,domain>& toRV() { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
+    bool              end        ( ) const { return ( ArrayIterator<P>::second >= ArrayIterator<P>::first.getRef().getSize() ); }
+    ArrayIterator<P>& operator++ ( )       { ++ArrayIterator<P>::second; return *this; }
+  };
+  // Static extraction methods...
+  static const DiscreteDomain<T>& getDomain ( ) { return domain; }
+  // Constructor / destructor methods...
+  DiscreteDomainRV ( )                 { Id<T>::set(0); }
+  DiscreteDomainRV ( int i )           { Id<T>::set(i); }
+  DiscreteDomainRV ( const char* ps )  { assert(ps!=NULL); Id<T>::set(domain.addIndex(ps)); }
+  // Specification methods...
+  template<class P>
+  DiscreteDomainRV<T,domain>& setVal ( const ArrayIterator<P>& it ) { *this=it; return *this; }
+  bool setFirst ( )  { Id<T>::set(0); return isValid(); }
+  bool setNext  ( )  { Id<T>::setNext(); if (!isValid()){Id<T>::set(0); return false;} return true; }
+  // Extraction methods...
+  bool   isValid   ( )                   const { return *this<domain.getSize(); } //return (this->Id<T>::operator<(domain.getSize())); }
+  int    getIndex  ( )                   const { return Id<T>::toInt(); }  // DO NOT DELETE THIS METHOD!!!!!!!!!!
+  string getString ( )                   const { return domain.getString(Id<T>::toInt()); }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const DiscreteDomainRV<T,domain>& rv ) { return  os<<rv.getString(); }
+  friend String&  operator<< ( String& str, const DiscreteDomainRV<T,domain>& rv ) { return str<<rv.getString(); }
+  friend pair<StringInput,DiscreteDomainRV<T,domain>*> operator>> ( const StringInput ps, DiscreteDomainRV<T,domain>& rv ) { return pair<StringInput,DiscreteDomainRV<T,domain>*>(ps,&rv); }
+  friend StringInput operator>> ( pair<StringInput,DiscreteDomainRV<T,domain>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    ////assert(*delimbuff.second<domain.getSize());
+    int j=0;
+    StringInput psIn = delimbuff.first;
+    if(psDlm[0]=='\0') { *delimbuff.second=psIn.c_str(); return psIn+strlen(psIn.c_str()); }
+    for(int i=0;psIn[i]!='\0';i++) {
+      if(psIn[i]==psDlm[j]) j++;
+      else j=0;
+      strTemp[i]=psIn[i];
+      if(j==int(strlen(psDlm))) { strTemp[i+1-j]='\0'; /*delimbuff.second->set(domain.addIndex(psIn.c_str()));*/ *delimbuff.second=strTemp.c_array(); return psIn+i+1;}
+    }
+    return NULL; //psIn;
+  }
+};
+template <class T, DiscreteDomain<T>& domain>
+String DiscreteDomainRV<T,domain>::strTemp ( 100 );
+/* DON'T COMMENT BACK IN!!! THIS HAS BEEN MOVED TO nl-refrv.h!!!!!!
+////////////////////////////////////////////////////////////
+template <class T>
+class RefRV : public Id<const T*> {
+ public:
+  typedef RefRV<T> BaseType;
+  static const int NUM_VARS = 1;
+  static const T   DUMMY;
+  ////////////////////
+  template<class P>
+  class ArrayDistrib : public Array<pair<RefRV<T>,P> > {
+  };
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public pair<SafePtr<const ArrayDistrib<P> >,Id<int> > {
+   public:
+    static const int NUM_ITERS = NUM_VARS;
+    operator RefRV<T>() const { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
+    //const DiscreteDomainRV<T,domain>& toRV() { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
+  };
+  // Constructor / destructor methods...
+  RefRV ( )                 { Id<const T*>::set(NULL); }
+  RefRV ( const T& t )      { Id<const T*>::set(&t);   }
+  // Specification methods...
+  template<class P>
+  RefRV<T>& setVal ( const ArrayIterator<P>& it ) { *this=it; return *this; }
+  // Extraction methods...
+  const T& getRef ( ) const { return (Id<const T*>::toInt()==NULL) ? DUMMY : *(static_cast<const T*>(Id<const T*>::toInt())); }
+  // Input / output methods..
+  friend ostream& operator<< ( ostream& os, const RefRV<T>& rv ) { return os <<&rv.getRef(); }  //{ return  os<<rv.getRef(); }
+  friend String&  operator<< ( String& str, const RefRV<T>& rv ) { return str<<"addr"<<(long int)(void*)&rv.getRef(); }  //{ return str<<rv.getRef(); }
+  friend pair<StringInput,RefRV<T>*> operator>> ( const StringInput ps, RefRV<T>& rv ) { return pair<StringInput,RefRV<T>*>(ps,&rv); }
+  friend StringInput operator>> ( pair<StringInput,RefRV<T>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    return NULL; //psIn;
+  }
+};
+template <class T> const T RefRV<T>::DUMMY;
+*/
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+//
+//  Joint2DRV
+//
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////
+template<class V1,class V2>
+class Joint2DRV {
+ public:
+  V1 first;
+  V2 second;
+  // Constructor / destructor methods...
+  Joint2DRV ( )                            { }
+  Joint2DRV ( const V1& v1, const V2& v2 ) { first=v1; second=v2; }
+  // Extraction methods...
+  size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey();
+                                /*fprintf(stderr,"  (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
+  bool      operator< ( const Joint2DRV<V1,V2>& j )  const { return ( (first<j.first) ||
+                                                                      (first==j.first && second<j.second) ); }
+  bool      operator== ( const Joint2DRV<V1,V2>& j ) const { return ( first==j.first && second==j.second ); }
+  bool      operator!= ( const Joint2DRV<V1,V2>& j ) const { return ( !(first==j.first && second==j.second) ); }
+};
+////////////////////////////////////////////////////////////
+template<char* SD1,class V1,char* SD2,class V2,char* SD3>
+class DelimitedJoint2DRV : public Joint2DRV<V1,V2> {
+ public:
+  static const int NUM_VARS = V1::NUM_VARS + V2::NUM_VARS;
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public pair<typename V1::template ArrayIterator<P>, typename V2::template ArrayIterator<P> > {
+   public:
+    // static const int NUM_ITERS = (typename V1::template ArrayIterator<P>)::NUM_ITERS + (typename V2::template ArrayIterator<P>)::NUM_ITERS;
+    static const int NUM_ITERS = NUM_VARS;
+//    DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& set ( DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) const { first.set(rv.first=first); rv.second=second; return rv; }
+    friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) {
+      return os<<SD1<<rv.first<<SD2<<rv.second<<SD3; }
+  };
+  // Constructor / destructor methods...
+  DelimitedJoint2DRV ( )                             : Joint2DRV<V1,V2>()      { }
+  DelimitedJoint2DRV ( const V1& v1, const V2& v2 )  : Joint2DRV<V1,V2>(v1,v2) { }
+  DelimitedJoint2DRV ( char* ps )                    : Joint2DRV<V1,V2>()      { ps>>*this>>"\0"; }
+  DelimitedJoint2DRV ( const char* ps )              : Joint2DRV<V1,V2>()      { strdup(ps)>>*this>>"\0"; }  //DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>(strdup(ps)) { }
+  // Specification methods...
+  template<class P>
+  DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& setVal ( const ArrayIterator<P>& it ) {
+    Joint2DRV<V1,V2>::first.setVal(it.first); Joint2DRV<V1,V2>::second.setVal(it.second); return *this; }
+  // Extraction methods...
+  bool operator==(const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& vv) const { return Joint2DRV<V1,V2>::operator==(vv); }
+  bool operator< (const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& vv) const { return Joint2DRV<V1,V2>::operator<(vv); }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return  os<<SD1<<rv.first<<SD2<<rv.second<<SD3; }
+  friend String&  operator<< ( String& str, const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return str<<SD1<<rv.first<<SD2<<rv.second<<SD3; }
+  friend IStream operator>> ( pair<IStream,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> is_x, const char* psDlm ) {
+    IStream&                               is =  is_x.first;
+    DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& x  = *is_x.second;
+    // Propagate fail...
+    if ( IStream()==is ) return is;
+    // Use last delimiter only if not empty (otherwise it will immediately trivially match)...
+    return ( (SD3[0]=='\0') ? is>>SD1>>x.first>>SD2>>x.second>>psDlm
+                            : is>>SD1>>x.first>>SD2>>x.second>>SD3>>psDlm );
+  }
+  // OBSOLETE!
+  friend pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> operator>> ( StringInput ps, DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*>(ps,&rv); }
+  friend StringInput    operator>> ( pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>psDlm
+                            : delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>psDlm );
+  }
+};
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+//
+//  Joint3DRV
+//
+////////////////////////////////////////////////////////////////////////////////
+template<class V1,class V2,class V3>
+class Joint3DRV {
+ public:
+  V1 first;
+  V2 second;
+  V3 third;
+  // Constructor / destructor methods...
+  Joint3DRV ( )                                          { }
+  Joint3DRV ( const V1& v1, const V2& v2, const V3& v3 ) { first=v1; second=v2; third=v3; }
+  /*
+  // Specification methods...
+  bool                 operator< ( const Joint3DRV<V1,V2,V3>& j ) const {
+    return ( (x1<j.x1) ||
+             (x1==j.x1 && x2<j.x2) ||
+             (x1==j.x1 && x2==j.x2 && x3<j.x3) ) ;
+  }
+  */
+  // Extraction methods...
+  size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey(); k=rotLeft(k,3); k^=third.getHashKey();
+                                /*fprintf(stderr,"  (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
+//  bool      operator< ( const Joint2DRV<V1,V2>& j )  const { return ( (first<j.first) ||
+//                                                                      (first==j.first && second<j.second) ); }
+  bool      operator== ( const Joint3DRV<V1,V2,V3>& j ) const { return ( first==j.first && second==j.second && third==j.third ); }
+  bool      operator!= ( const Joint3DRV<V1,V2,V3>& j ) const { return ( !(first==j.first && second==j.second && third==j.third) ); }
+};
+////////////////////////////////////////////////////////////
+template<char* SD1,class V1,char* SD2,class V2,char* SD3,class V3,char* SD4>
+class DelimitedJoint3DRV : public Joint3DRV<V1,V2,V3> {
+ public:
+  static const int NUM_VARS = V1::NUM_VARS + V2::NUM_VARS + V3::NUM_VARS;
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public trip<typename V1::template ArrayIterator<P>, typename V2::template ArrayIterator<P>, typename V3::template ArrayIterator<P> > {
+   public:
+    // static const int NUM_ITERS = (typename V1::template ArrayIterator<P>)::NUM_ITERS + (typename V2::template ArrayIterator<P>)::NUM_ITERS;
+    static const int NUM_ITERS = NUM_VARS;
+    friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) {
+      return os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4; }
+  };
+  // Constructor / destructor methods...
+  DelimitedJoint3DRV ( )                                           : Joint3DRV<V1,V2,V3>()         { }
+  DelimitedJoint3DRV ( const V1& v1, const V2& v2, const V3& v3 )  : Joint3DRV<V1,V2,V3>(v1,v2,v3) { }
+  DelimitedJoint3DRV ( char* ps )                                  : Joint3DRV<V1,V2,V3>()         { ps>>*this>>"\0"; }
+  DelimitedJoint3DRV ( const char* ps )                            : Joint3DRV<V1,V2,V3>()         { strdup(ps)>>*this>>"\0"; }
+  // Specification methods...
+  template<class P>
+  DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& setVal ( const ArrayIterator<P>& it ) {
+    Joint3DRV<V1,V2,V3>::first.setVal(it.first); Joint3DRV<V1,V2,V3>::second.setVal(it.second); Joint3DRV<V1,V2,V3>::third.setVal(it.third); return *this; }
+  // Extraction methods...
+  bool operator==(const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& vvv) const { return Joint3DRV<V1,V2,V3>::operator==(vvv); }
+  bool operator< (const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& vvv) const { return Joint3DRV<V1,V2,V3>::operator< (vvv); }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& rv ) { return  os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4; }
+  friend String&  operator<< ( String& str, const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& rv ) { return str<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4; }
+  friend pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*> operator>> ( StringInput ps, DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& rv ) {
+    return pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*>(ps,&rv); }
+  friend StringInput    operator>> ( pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    return ( (SD4[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>psDlm
+                            : delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>psDlm );
+  }
+};
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+//
+//  Joint4DRV
+//
+////////////////////////////////////////////////////////////////////////////////
+template<class V1,class V2,class V3, class V4>
+class Joint4DRV {
+ public:
+  V1 first;
+  V2 second;
+  V3 third;
+  V4 fourth;
+  // Constructor / destructor methods...
+  Joint4DRV ( )                                          { }
+  Joint4DRV ( const V1& v1, const V2& v2, const V3& v3, const V4& v4 ) { first=v1; second=v2; third=v3; fourth=v4;}
+  // Extraction methods...
+  size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey(); k=rotLeft(k,3); k^=third.getHashKey();k^=fourth.getHashKey();
+                                /*fprintf(stderr,"  (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
+//  bool      operator< ( const Joint2DRV<V1,V2>& j )  const { return ( (first<j.first) ||
+//                                                                      (first==j.first && second<j.second) ); }
+  bool      operator== ( const Joint4DRV<V1,V2,V3,V4>& j ) const { return ( first==j.first && second==j.second && third==j.third && fourth==j.fourth ); }
+  bool      operator!= ( const Joint4DRV<V1,V2,V3,V4>& j ) const { return ( !(first==j.first && second==j.second && third==j.third && fourth==j.fourth) ); }
+};
+////////////////////////////////////////////////////////////
+template<char* SD1,class V1,char* SD2,class V2,char* SD3,class V3,char* SD4,class V4, char* SD5>
+class DelimitedJoint4DRV : public Joint4DRV<V1,V2,V3,V4> {
+ public:
+  static const int NUM_VARS = V1::NUM_VARS + V2::NUM_VARS + V3::NUM_VARS+ V4::NUM_VARS;
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public quad<typename V1::template ArrayIterator<P>, typename V2::template ArrayIterator<P>, typename V3::template ArrayIterator<P> , typename V4::template ArrayIterator<P> > {
+   public:
+    // static const int NUM_ITERS = (typename V1::template ArrayIterator<P>)::NUM_ITERS + (typename V2::template ArrayIterator<P>)::NUM_ITERS;
+    static const int NUM_ITERS = NUM_VARS;
+    friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) {
+      return os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4<<rv.fourth<<SD5; }
+  };
+  // Constructor / destructor methods...
+  DelimitedJoint4DRV ( )                                           : Joint4DRV<V1,V2,V3,V4>()         { }
+  DelimitedJoint4DRV ( const V1& v1, const V2& v2, const V3& v3, const V4& v4 )  : Joint4DRV<V1,V2,V3,V4>(v1,v2,v3,v4) { }
+  DelimitedJoint4DRV ( char* ps )                                  : Joint4DRV<V1,V2,V3,V4>()         { ps>>*this>>"\0"; }
+  DelimitedJoint4DRV ( const char* ps )                            : Joint4DRV<V1,V2,V3,V4>()         { strdup(ps)>>*this>>"\0"; }
+  // Specification methods...
+  template<class P>
+  DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& setVal ( const ArrayIterator<P>& it ) {
+    Joint4DRV<V1,V2,V3,V4>::first.setVal(it.first);
+    Joint4DRV<V1,V2,V3,V4>::second.setVal(it.second);
+    Joint4DRV<V1,V2,V3,V4>::third.setVal(it.third);
+    Joint4DRV<V1,V2,V3,V4>::fourth.setVal(it.fourth);
+    return *this;
+    }
+  // Extraction methods...
+  bool operator==(const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& vvvv) const { return Joint4DRV<V1,V2,V3,V4>::operator==(vvvv); }
+  bool operator< (const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& vvvv) const { return Joint4DRV<V1,V2,V3,V4>::operator< (vvvv); }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& rv ) { return  os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4<<rv.fourth<<SD5; }
+  friend String&  operator<< ( String& str, const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& rv ) { return str<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4<<rv.fourth<<SD5; }
+  friend pair<StringInput,DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>*> operator>> ( StringInput ps, DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& rv ) {
+    return pair<StringInput,DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>*>(ps,&rv); }
+  friend StringInput    operator>> ( pair<StringInput,DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    return ( (SD5[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>delimbuff.second->fourth>>psDlm
+                            : delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>delimbuff.second->fourth>>SD5>>psDlm );
+  }
+};
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  JointArrayRV<T,I>
+//
+////////////////////////////////////////////////////////////////////////////////
+template <int I, class T>
+class JointArrayRV {
+ private:
+  // Data members...
+  T at[I];
+ public:
+  typedef T ElementType;
+  /*
+  // Constructor / destructor methods...
+  JointArrayRV ( )              { }
+  JointArrayRV ( const T& t )   { for(int i=0;i<I;i++) at[i]=t; }
+  */
+  // Static extraction methods...
+  static const int SIZE = I;
+  static const int getSize ( )       { return I; }
+  // Specification methods...
+  T&         set       (int i)       { assert(0<=i); assert(i<I); return at[i]; }
+  // Extraction methods...
+  const T&   get       (int i) const { assert(NULL!=this); assert(0<=i); assert(i<I); return at[i]; }
+  bool       operator< ( const JointArrayRV<I,T>& a ) const {
+    int i;
+    for ( i=0; at[i]==a.at[i] && i<I; i++ ) ;
+    return ( i<I && at[i]<a.at[i] ) ;
+  }
+  bool       operator== ( const JointArrayRV<I,T>& a ) const {
+    int i;
+    for ( i=0; at[i]==a.at[i] && i<I; i++ ) ;
+    return ( i==I ) ;
+  }
+  size_t getHashKey   ( ) const { size_t k=0; for(int i=0;i<I;i++){k=rotLeft(k,3); k^=get(i).getHashKey(); } return k; }
+};
+////////////////////////////////////////////////////////////////////////////////
+template <int I, char* SD, class T>
+class DelimitedJointArrayRV : public JointArrayRV<I,T> {
+ public:
+  static const int NUM_VARS = T::NUM_VARS * I;
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public StaticSafeArray<I,typename T::template ArrayIterator<P> > {
+   public:
+    static const int NUM_ITERS = NUM_VARS;
+    // static const int NUM_ITERS = (typename T::template ArrayIterator<P>)::NUM_ITERS * I;
+    friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) { for(int i=0;i<I;i++) os<<((i==0)?"":SD)<<rv.get(i); return os; }
+  };
+  // Specification methods...
+  template<class P>
+  DelimitedJointArrayRV<I,SD,T>& setVal ( const ArrayIterator<P>& it ) {
+    for(int i=0;i<I;i++) JointArrayRV<I,T>::set(i).setVal(it.get(i)); return *this; }
+  // Extraction methods...
+  bool operator==(const DelimitedJointArrayRV<I,SD,T>& a) const { return JointArrayRV<I,T>::operator==(a); }
+  bool operator< (const DelimitedJointArrayRV<I,SD,T>& a) const { return JointArrayRV<I,T>::operator<(a); }
+  // Input / output methods...
+  friend ostream& operator<< ( ostream& os, const DelimitedJointArrayRV<I,SD,T>& a ) { for(int i=0;i<I;i++) os<<((i==0)?"":SD)<<a.get(i); return os;  }
+  friend String&  operator<< ( String& str, const DelimitedJointArrayRV<I,SD,T>& a ) { for(int i=0;i<I;i++)str<<((i==0)?"":SD)<<a.get(i); return str; }
+  friend IStream operator>> ( pair<IStream,DelimitedJointArrayRV<I,SD,T>*> is_x, const char* psDlm ) {
+    IStream&                       is =  is_x.first;
+    DelimitedJointArrayRV<I,SD,T>& x  = *is_x.second;
+    if (IStream()==is) return IStream();
+    for(int i=0;i<I;i++)
+      is = pair<IStream,T*>(is,&x.set(i))>>((i<I-1)?SD:psDlm);
+    return is;
+  }
+  // OBSOLETE!
+  friend pair<StringInput,DelimitedJointArrayRV<I,SD,T>*> operator>> ( StringInput ps, DelimitedJointArrayRV<I,SD,T>& a ) { return pair<StringInput,DelimitedJointArrayRV<I,SD,T>*>(ps,&a); }
+  friend StringInput operator>> ( pair<StringInput,DelimitedJointArrayRV<I,SD,T>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    StringInput psIn = delimbuff.first;
+    for(int i=0;i<I;i++)
+      psIn = pair<StringInput,T*>(psIn,&delimbuff.second->set(i))>>((i<I-1)?SD:psDlm);
+    return psIn;
+  }
+};
+///////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+//
+//  History<T,N>
+//
+////////////////////////////////////////////////////////////////////////////////
+template <int N,class T>
+class History {
+ private:
+  // Data members...
+  StaticSafeArray<N,T> at;
+ public:
+  // Constructor / destructor methods...
+  History ( )                 { }
+  History ( char* ps )        { ps>>*this>>"\0"; }
+  /*
+  History ( char* ps )        { read(ps); }
+  */
+//  History ( const string& s ) { read(s.c_str()); }
+  // Specification methods...
+  void advanceHistory(const T& t) { for(int i=N-1;i>0;i--)at.set(i)=at.get(i-1); at.set(0)=t; }
+  T&   advanceHistory()           { for(int i=N-1;i>0;i--)at.set(i)=at.get(i-1); return at.set(0); }
+  T&   setBack(int i)             { return at.set(i); }
+  // Extraction methods...
+  const T& getBack(int i) const { assert(i>=0); assert(i<N); return at.get(i); }
+  // Input / output methods...
+  /*
+  void read ( char* ps, const ReaderContext& rc=ReaderContext() ) { char* psT; for(int i=0;i<N;i++){char* z=strtok_r((0==i)?ps:NULL,";",&psT); assert(z); at.set(i).read(z);} }
+  //at.set(i).read(strtok_r((0==i)?ps:NULL,";",&psT)); }
+  */
+  friend ostream& operator<< ( ostream& os, const History<N,T>& a ) { for(int i=0;i<N;i++)os<<((i==0)?"":";")<<a.getBack(i); return os; }
+  friend pair<StringInput,History<N,T>*> operator>> ( StringInput ps, History<N,T>& a ) { return pair<StringInput,History<N,T>*>(ps,&a); }
+  friend StringInput    operator>> ( pair<StringInput,History<N,T>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    StringInput psIn = delimbuff.first;
+    for(int i=0;i<N;i++)
+      psIn = pair<StringInput,T*>(psIn,&delimbuff.second->setBack(i))>>((i<N-1)?";":psDlm);
+    return psIn;
+  }
+  /*
+  void write ( FILE* pf ) const { for(int i=0;i<N;i++) {fprintf(pf,(0==i)?"":";"); at.get(i).write(pf);} }
+  */
+};
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+#endif //_NL_RAND_VAR__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-refrv.h ADDED Viewed

	@@ -0,0 +1,74 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////
+  //template <class T>
+template<class T, map<T,T>& domain>
+class RefRV : public Id<const T*> {
+ public:
+  typedef RefRV<T,domain> BaseType;
+  static const int NUM_VARS = 1;
+  static const T   DUMMY;
+  ////////////////////
+  template<class P>
+  class ArrayDistrib : public Array<pair<RefRV<T,domain>,P> > {
+  };
+  ////////////////////
+  template<class P>
+  class ArrayIterator : public pair<SafePtr<const ArrayDistrib<P> >,Id<int> > {
+   public:
+    static const int NUM_ITERS = NUM_VARS;
+    operator RefRV<T,domain>() const { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
+    //const DiscreteDomainRV<T,domain>& toRV() { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
+  };
+  // Constructor / destructor methods...
+  RefRV ( )                 { Id<const T*>::set(NULL); }
+  //RefRV ( int i )           { Id<const T*>::set(i); }
+  //RefRV ( const T& t )      { Id<const T*>::set(&t);   }
+  RefRV ( const T& t )      { if(domain.find(t)==domain.end()) *(const_cast<T*>(Id<const T*>::set(&domain[t]).toInt())) = t;
+                              else Id<const T*>::set(&domain[t]); }
+  // Specification methods...
+  template<class P>
+  RefRV<T,domain>& setVal ( const ArrayIterator<P>& it ) { *this=it; return *this; }
+  //T&       setRef ( )       { return Id<const T*>::setRef(); }
+  // Extraction methods...
+  const T& getRef ( ) const { return (Id<const T*>::toInt()==NULL) ? DUMMY : *(static_cast<const T*>(Id<const T*>::toInt())); }
+  static map<T,T>& setDomain ( ) { return domain; }
+  // Input / output methods..
+  friend ostream& operator<< ( ostream& os, const RefRV<T,domain>& rv ) { return os <<&rv.getRef(); }  //{ return  os<<rv.getRef(); }
+  friend String&  operator<< ( String& str, const RefRV<T,domain>& rv ) { return str<<"addr"<<(long int)(void*)&rv.getRef(); }  //{ return str<<rv.getRef(); }
+  friend pair<StringInput,RefRV<T,domain>*> operator>> ( const StringInput ps, RefRV<T,domain>& rv ) { return pair<StringInput,RefRV<T,domain>*>(ps,&rv); }
+  friend StringInput operator>> ( pair<StringInput,RefRV<T,domain>*> delimbuff, const char* psDlm ) {
+    if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
+    return NULL; //psIn;
+  }
+};
+template <class T, map<T,T>& domain> const T RefRV<T,domain>::DUMMY;

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-timer.h ADDED Viewed

	@@ -0,0 +1,52 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+#ifndef _NL_TIMER__
+#define _NL_TIMER__
+#include <sys/time.h>
+class Timer {
+ private:
+  struct timeval kept;
+  struct timeval beg;
+ public:
+  Timer ( ) { gettimeofday(&beg,NULL); kept.tv_sec=0; kept.tv_usec=0; }
+  void start ( ) { gettimeofday(&beg,NULL); }
+  void pause ( ) {
+    struct timeval now; gettimeofday(&now,NULL);
+    kept.tv_sec  += now.tv_sec  - beg.tv_sec;
+    kept.tv_usec += (now.tv_usec - beg.tv_usec)%1000000;
+    kept.tv_sec  += int((now.tv_usec - beg.tv_usec)/1000000);
+  }
+  double elapsed ( ) {  // in milliseconds.
+    return (double(kept.tv_sec)*1000.0 + double(kept.tv_usec)/1000.0);
+    //struct timeval end; gettimeofday(&end,NULL);
+    //double beg_time_s = (double) beg.tv_sec + (double) ((double)beg.tv_usec / 1000000.0);
+    //double end_time_s = (double) end.tv_sec + (double) ((double)end.tv_usec / 1000000.0);
+    //return ( (end_time_s - beg_time_s) * 1000.0 );
+  }
+};
+#endif //_NL_TIMER__

mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-tree.h ADDED Viewed

	@@ -0,0 +1,43 @@

+///////////////////////////////////////////////////////////////////////////////
+//                                                                           //
+// This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
+//                                                                           //
+//    ModelBlocks is free software: you can redistribute it and/or modify    //
+//    it under the terms of the GNU General Public License as published by   //
+//    the Free Software Foundation, either version 3 of the License, or      //
+//    (at your option) any later version.                                    //
+//                                                                           //
+//    ModelBlocks is distributed in the hope that it will be useful,         //
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of         //
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          //
+//    GNU General Public License for more details.                           //
+//                                                                           //
+//    You should have received a copy of the GNU General Public License      //
+//    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   //
+//                                                                           //
+//    ModelBlocks developers designate this particular file as subject to    //
+//    the "Moses" exception as provided by ModelBlocks developers in         //
+//    the LICENSE file that accompanies this code.                           //
+//                                                                           //
+///////////////////////////////////////////////////////////////////////////////
+template<class B, class T>
+class Tree : public T {
+ private:
+  // Data members...
+  SimpleHash<B,Tree<B,T>*> apt;
+  static const Tree<B,T> tDummy;
+ public:
+  // Constructor / destructor methods...
+  ~Tree ( )                  { for(typename SimpleHash<B,Tree<B,T>*>::iterator i=apt.begin(); i!=apt.end(); i++) delete i->second; }
+  Tree  ( )                  { }
+//  Tree  ( const Tree<T>& t ) { ptL = (t.ptL) ? new Tree<T>(*t.ptL) : NULL;
+//                               ptR = (t.ptR) ? new Tree<T>(*t.ptR) : NULL; }
+  // Extraction methods...
+  const bool       isTerm    ( ) const { return (apt.empty()); }
+  const Tree<B,T>& getBranch ( const B& b ) const { return (apt.find(b)!=apt.end()) ? *apt.find(b)->second : tDummy; }
+  // Specification methods...
+  Tree<B,T>& setBranch       ( const B& b )       { if (apt.find(b)==apt.end()) apt[b]=new Tree<B,T>(); return *apt[b]; }
+};
+template<class B, class T> const Tree<B,T> Tree<B,T>::tDummy;// = Tree<B,T>();

mosesdecoder/contrib/zmert-moses.pl ADDED Viewed

	@@ -0,0 +1,1121 @@

+#!/usr/bin/perl -w
+# Usage:
+# zmert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
+# For other options see below or run 'zmert-moses.pl --help'
+# Notes:
+# <foreign> and <english> should be raw text files, one sentence per line
+# <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
+# Revision history
+# 29 Dec 2009 Derived from mert-moses-new.pl (Kamil Kos)
+use FindBin qw($RealBin);
+use File::Basename;
+my $SCRIPTS_ROOTDIR = $RealBin;
+$SCRIPTS_ROOTDIR =~ s/\/training$//;
+$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
+# for each _d_istortion, _l_anguage _m_odel, _t_ranslation _m_odel and _w_ord penalty, there is a list
+# of [ default value, lower bound, upper bound ]-triples. In most cases, only one triple is used,
+# but the translation model has currently 5 features
+# defaults for initial values and ranges are:
+my $default_triples = {
+    # these two basic models exist even if not specified, they are
+    # not associated with any model file
+    "w" => [ [ 0.0, -1.0, 1.0 ] ],  # word penalty
+};
+my $additional_triples = {
+    # if the more lambda parameters for the weights are needed
+    # (due to additional tables) use the following values for them
+    "d"  => [ [ 1.0, 0.0, 2.0 ],    # lexicalized reordering model
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ],
+	      [ 1.0, 0.0, 2.0 ] ],
+    "lm" => [ [ 1.0, 0.0, 2.0 ] ],  # language model
+    "g"  => [ [ 1.0, 0.0, 2.0 ],    # generation model
+	      [ 1.0, 0.0, 2.0 ] ],
+    "tm" => [ [ 0.3, 0.0, 0.5 ],    # translation model
+	      [ 0.2, 0.0, 0.5 ],
+	      [ 0.3, 0.0, 0.5 ],
+	      [ 0.2, 0.0, 0.5 ],
+	      [ 0.0,-1.0, 1.0 ] ],  # ... last weight is phrase penalty
+    "lex"=> [ [ 0.1, 0.0, 0.2 ] ],  # global lexical model
+};
+# moses.ini file uses FULL names for lambdas, while this training script internally (and on the command line)
+# uses ABBR names.
+my $ABBR_FULL_MAP = "d=weight-d lm=weight-l tm=weight-t w=weight-w g=weight-generation lex=weight-lex";
+my %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
+my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
+# We parse moses.ini to figure out how many weights do we need to optimize.
+# For this, we must know the correspondence between options defining files
+# for models and options assigning weights to these models.
+my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d generation-file=g global-lexical-file=lex";
+my %TABLECONFIG2ABBR = map {split(/=/,$_,2)} split /\s+/, $TABLECONFIG_ABBR_MAP;
+# There are weights that do not correspond to any input file, they just increase the total number of lambdas we optimize
+#my $extra_lambdas_for_model = {
+#  "w" => 1,  # word penalty
+#  "d" => 1,  # basic distortion
+#};
+my $verbose = 0;
+my $___MERT_VERBOSE = 1; # verbosity of zmert (values: 0-2)
+my $___DECODER_VERBOSE = 1; # should decoder output be included? - 0:no,1:yes
+my $___SAVE_INTER = 2; # save intermediate nbest-lists
+my $usage = 0; # request for --help
+my $___WORKING_DIR = "mert-work";
+my $___DEV_F = undef; # required, input text to decode
+my $___DEV_E = undef; # required, basename of files with references
+my $___DECODER = undef; # required, pathname to the decoder executable
+my $___CONFIG = undef; # required, pathname to startup ini file
+my $___N_BEST_LIST_SIZE = 100;
+my $___MAX_MERT_ITER = 0; # do not limit the number of iterations
+my $queue_flags = "-l mem_free=0.5G -hard";  # extra parameters for parallelizer
+      # the -l ws0ssmt is relevant only to JHU workshop
+my $___JOBS = undef; # if parallel, number of jobs to use (undef -> serial)
+my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
+my $___LAMBDA = undef; # string specifying the seed weights and boundaries of all lambdas
+my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
+my $___FILTER_PHRASE_TABLE = 1; # filter phrase table
+my $___PREDICTABLE_SEEDS = 0;
+my $___METRIC = "BLEU 4 shortest"; # name of metric that will be used for minimum error training, followed by metric parameters (see zmert documentation)
+my $___SEMPOSBLEU_WEIGHTS = "1 1"; # weights of SemPOS and BLEU
+my $___LAMBDAS_OUT = undef; # file where final lambdas should be written
+my $___EXTRACT_SEMPOS = "none"; # how shall we get the SemPOS factor (only for SemPOS metric)
+      # options: 1) 'none' - moses generates SemPOS factor in required format
+      #             (<word_form>|<SemPOS>)
+      #          2) 'factors:<factor_index_list>' - extract factors from decoder output on positions from <factor_index_list>
+      #              <factor_index_list> contains indices of factors separated by comma, e.g. '0,1,4'
+      #          3) 'tmt' - moses outputs only <word_form> and we need to
+      #             generate factors like SemPOS with TectoMT (see http://ufal.mff.cuni.cz/tectomt/)
+# set 1 if using with async decoder
+my $___ASYNC = 0;
+# Use "--norm" to select normalization in mert
+my $___NORM = "none";
+# set 0 if input type is text, set 1 if input type is confusion network
+my $___INPUTTYPE = 0;
+my $mertdir = "$SCRIPTS_ROOTDIR/../zmert/";  # path to zmert directory
+my $filtercmd = undef; # path to filter-model-given-input.pl
+my $clonecmd = "$SCRIPTS_ROOTDIR/training/clone_moses_model.pl"; # executable clone_moses_model.pl
+my $qsubwrapper = undef;
+my $moses_parallel_cmd = undef;
+my $old_sge = 0; # assume sge<6.0
+my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
+                                  # if undef work on all features
+                                  # (others are fixed to the starting values)
+my %active_features; # hash with features to optimize; optimize all if empty
+use strict;
+use Getopt::Long;
+GetOptions(
+  "working-dir=s" => \$___WORKING_DIR,
+  "input=s" => \$___DEV_F,
+  "inputtype=i" => \$___INPUTTYPE,
+  "refs=s" => \$___DEV_E,
+  "decoder=s" => \$___DECODER,
+  "config=s" => \$___CONFIG,
+  "nbest:i" => \$___N_BEST_LIST_SIZE,
+  "maxiter:i" => \$___MAX_MERT_ITER,
+  "queue-flags:s" => \$queue_flags,
+  "jobs=i" => \$___JOBS,
+  "decoder-flags=s" => \$___DECODER_FLAGS,
+  "lambdas=s" => \$___LAMBDA,
+  "metric=s" => \$___METRIC,
+  "semposbleu-weights:s" => \$___SEMPOSBLEU_WEIGHTS,
+  "extract-sempos=s" => \$___EXTRACT_SEMPOS,
+  "norm:s" => \$___NORM,
+  "help" => \$usage,
+  "verbose" => \$verbose,
+  "mert-verbose:i" => \$___MERT_VERBOSE,
+  "decoder-verbose:i" => \$___DECODER_VERBOSE,
+  "mertdir:s" => \$mertdir, # allow to override the default location of zmert.jar
+  "lambdas-out:s" => \$___LAMBDAS_OUT,
+  "rootdir=s" => \$SCRIPTS_ROOTDIR,
+  "filtercmd=s" => \$filtercmd, # allow to override the default location
+  "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
+  "mosesparallelcmd=s" => \$moses_parallel_cmd, # allow to override the default location
+  "old-sge" => \$old_sge, #passed to moses-parallel
+  "filter-phrase-table!" => \$___FILTER_PHRASE_TABLE, # allow (disallow)filtering of phrase tables
+  "predictable-seeds:s" => \$___PREDICTABLE_SEEDS, # allow (disallow) switch on/off reseeding of random restarts
+  "async=i" => \$___ASYNC, #whether script to be used with async decoder
+  "activate-features=s" => \$___ACTIVATE_FEATURES #comma-separated (or blank-separated) list of features to work on (others are fixed to the starting values)
+) or exit(1);
+print "Predict $___PREDICTABLE_SEEDS\n";
+# the 4 required parameters can be supplied on the command line directly
+# or using the --options
+if (scalar @ARGV == 4) {
+  # required parameters: input_file references_basename decoder_executable
+  $___DEV_F = shift;
+  $___DEV_E = shift;
+  $___DECODER = shift;
+  $___CONFIG = shift;
+}
+if ($___ASYNC) {
+	delete $default_triples->{"w"};
+	$additional_triples->{"w"} = [ [ 0.0, -1.0, 1.0 ] ];
+}
+print STDERR "After default: $queue_flags\n";
+if ($usage || !defined $___DEV_F || !defined$___DEV_E || !defined$___DECODER || !defined $___CONFIG) {
+  print STDERR "usage: zmert-moses.pl input-text references decoder-executable decoder.ini
+Options:
+  --working-dir=mert-dir ... where all the files are created
+  --nbest=100 ... how big nbestlist to generate
+  --maxiter=N ... maximum number of zmert iterations
+  --jobs=N  ... set this to anything to run moses in parallel
+  --mosesparallelcmd=STRING ... use a different script instead of moses-parallel
+  --queue-flags=STRING  ... anything you with to pass to
+              qsub, eg. '-l ws06osssmt=true'
+              The default is
+								-l mem_free=0.5G -hard
+              To reset the parameters, please use \"--queue-flags=' '\" (i.e. a space between
+              the quotes).
+  --decoder-flags=STRING ... extra parameters for the decoder
+  --lambdas=STRING  ... default values and ranges for lambdas, a complex string
+         such as 'd:1,0.5-1.5 lm:1,0.5-1.5 tm:0.3,0.25-0.75;0.2,0.25-0.75;0.2,0.25-0.75;0.3,0.25-0.75;0,-0.5-0.5 w:0,-0.5-0.5'
+  --allow-unknown-lambdas ... keep going even if someone supplies a new lambda
+         in the lambdas option (such as 'superbmodel:1,0-1'); optimize it, too
+  --lambdas-out=STRING ... file where final lambdas should be written
+  --metric=STRING ... metric name for optimization with metric parameters
+         such as 'BLEU 4 closest' or 'SemPOS 0 1'. Use default parameters by specifying 'BLEU' or 'SemPOS'
+  --semposbleu-weights=STRING ... weights for SemPOS and BLEU in format 'N:M' where 'N' is SemPOS weight and 'M' BLEU weight
+         used only with SemPOS_BLEU metric
+  --extract-sempos=STRING ... none|factors:<factor_list>|tmt
+         'none' ... decoder generates all required factors for optimization metric
+         'factors:<factor_list>' ... extract factors with index in <factor_list> from decoder output
+                 e.g. 'factors:0,2,3' to extract first, third and fourth factor from decoder output
+         'tmt' ... use TectoMT (see http://ufal.mff.cuni.cz/tectomt) to generate required factors
+  --norm ... Select normalization for zmert
+  --mert-verbose=N ... verbosity of zmert [0|1|2]
+  --decoder-verbose=N ... decoder verbosity [0|1] - 1=decoder output included
+  --mertdir=STRING ... directory with zmert.jar
+  --filtercmd=STRING  ... path to filter-model-given-input.pl
+  --rootdir=STRING  ... where do helpers reside (if not given explicitly)
+  --mertdir=STRING ... path to zmert implementation
+  --scorenbestcmd=STRING  ... path to score-nbest.py
+  --old-sge ... passed to moses-parallel, assume Sun Grid Engine < 6.0
+  --inputtype=[0|1|2] ... Handle different input types (0 for text, 1 for confusion network, 2 for lattices, default is 0)
+  --no-filter-phrase-table ... disallow filtering of phrase tables
+                              (useful if binary phrase tables are available)
+  --predictable-seeds ... provide predictable seeds to mert so that random restarts are the same on every run
+  --activate-features=STRING  ... comma-separated list of features to work on
+                                  (if undef work on all features)
+                                  # (others are fixed to the starting values)
+  --verbose ... verbosity of this script
+  --help ... print this help
+";
+  exit 1;
+}
+# ensure we know where is tectomt, if we need it
+if( !defined $ENV{"TMT_ROOT"} && $___EXTRACT_SEMPOS =~ /tmt/) {
+  die "Cannot find TMT_ROOT. Is TectoMT really initialized?";
+}
+my $TMT_ROOT = $ENV{"TMT_ROOT"};
+my $srunblocks = "$TMT_ROOT/tools/srunblocks_streaming/srunblocks";
+my $scenario_file = "scenario";
+my $qruncmd = "/home/bojar/diplomka/bin/qruncmd";
+my $srunblocks_cmd = "$srunblocks --errorlevel=FATAL $scenario_file czech_source_sentence factored_output";
+if (defined $___JOBS && $___JOBS > 1) {
+  die "Can't run $qruncmd" if ! -x $qruncmd;
+  $srunblocks_cmd = "$qruncmd --jobs=$___JOBS --join '$srunblocks_cmd'";
+}
+# update variables if input is confusion network
+if ($___INPUTTYPE == 1)
+{
+  $ABBR_FULL_MAP = "$ABBR_FULL_MAP I=weight-i";
+  %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
+  %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
+  push @{$default_triples -> {"I"}}, [ 1.0, 0.0, 2.0 ];
+  #$extra_lambdas_for_model -> {"I"} = 1; #Confusion network posterior
+}
+# update variables if input is lattice
+if ($___INPUTTYPE == 2)
+{
+# TODO
+}
+if (defined $___ACTIVATE_FEATURES)
+{
+  %active_features = map {$_ => 1} split( /,/, $___ACTIVATE_FEATURES);
+}
+# Check validity of input parameters and set defaults if needed
+print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
+# path of script for filtering phrase tables and running the decoder
+$filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;
+$qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl" if !defined $qsubwrapper;
+$moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl"
+  if !defined $moses_parallel_cmd;
+die "Error: need to specify the zmert.jar directory" if !defined $mertdir;
+my $zmert_classpath = ensure_full_path("$mertdir/zmert.jar");
+die "File not found: $mertdir/zmert.jar (interpreted as $zmert_classpath)"
+  if ! -e $zmert_classpath;
+my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
+die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
+die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
+die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
+die "Not executable: $___DECODER" if ! -x $___DECODER;
+my $input_abs = ensure_full_path($___DEV_F);
+die "File not found: $___DEV_F (interpreted as $input_abs)."
+  if ! -e $input_abs;
+$___DEV_F = $input_abs;
+# Option to pass to qsubwrapper and moses-parallel
+my $pass_old_sge = $old_sge ? "-old-sge" : "";
+my $decoder_abs = ensure_full_path($___DECODER);
+die "File not found: $___DECODER (interpreted as $decoder_abs)."
+  if ! -x $decoder_abs;
+$___DECODER = $decoder_abs;
+my $ref_abs = ensure_full_path($___DEV_E);
+# check if English dev set (reference translations) exist and store a list of all references
+my @references;
+my @references_factored;
+if (-e $ref_abs) {
+  push @references, $ref_abs;
+}
+else {
+  # if multiple file, get a full list of the files
+    my $part = 0;
+    while (-e $ref_abs.$part) {
+        push @references, $ref_abs.$part;
+        $part++;
+    }
+    die("Reference translations not found: $___DEV_E (interpreted as $ref_abs)") unless $part;
+}
+my $config_abs = ensure_full_path($___CONFIG);
+die "File not found: $___CONFIG (interpreted as $config_abs)."
+  if ! -e $config_abs;
+$___CONFIG = $config_abs;
+# check validity of moses.ini and collect number of models and lambdas per model
+# need to make a copy of $extra_lambdas_for_model, scan_config spoils it
+#my %copy_of_extra_lambdas_for_model = %$extra_lambdas_for_model;
+my %used_triples = %{$default_triples};
+my ($models_used) = scan_config($___CONFIG);
+# Parse the lambda config string and convert it to a nice structure in the same format as $used_triples
+if (defined $___LAMBDA) {
+  my %specified_triples;
+  # interpreting lambdas from command line
+  foreach (split(/\s+/,$___LAMBDA)) {
+      my ($name,$values) = split(/:/);
+      die "Malformed setting: '$_', expected name:values\n" if !defined $name || !defined $values;
+      foreach my $startminmax (split/;/,$values) {
+	  if ($startminmax =~ /^(-?[\.\d]+),(-?[\.\d]+)-(-?[\.\d]+)$/) {
+	      my $start = $1;
+	      my $min = $2;
+	      my $max = $3;
+              push @{$specified_triples{$name}}, [$start, $min, $max];
+	  }
+	  else {
+	      die "Malformed feature range definition: $name => $startminmax\n";
+	  }
+      }
+  }
+  # sanity checks for specified lambda triples
+  foreach my $name (keys %used_triples) {
+      die "No lambdas specified for '$name', but ".($#{$used_triples{$name}}+1)." needed.\n"
+	  unless defined($specified_triples{$name});
+      die "Number of lambdas specified for '$name' (".($#{$specified_triples{$name}}+1).") does not match number needed (".($#{$used_triples{$name}}+1).")\n"
+	  if (($#{$used_triples{$name}}) != ($#{$specified_triples{$name}}));
+  }
+  foreach my $name (keys %specified_triples) {
+      die "Lambdas specified for '$name' ".(@{$specified_triples{$name}}).", but none needed.\n"
+	  unless defined($used_triples{$name});
+  }
+  %used_triples = %specified_triples;
+}
+# moses should use our config
+if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
+|| $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
+) {
+  die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
+}
+#store current directory and create the working directory (if needed)
+my $cwd = `pawd 2>/dev/null`;
+if(!$cwd){$cwd = `pwd`;}
+chomp($cwd);
+safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";
+{
+# open local scope
+#chdir to the working directory
+chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
+# fixed file names
+my $mert_logfile = "zmert.log";
+if ($___FILTER_PHRASE_TABLE){
+  # filter the phrase tables wih respect to input, use --decoder-flags
+  print "filtering the phrase tables... ".`date`;
+  my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";
+  if (defined $___JOBS) {
+    safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=filterphrases.out -stderr=filterphrases.err" )
+      or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";
+  } else {
+    safesystem($cmd) or die "Failed to filter the tables.";
+  }
+  # the decoder should now use the filtered model
+  $___CONFIG = "filtered/moses.ini";
+}
+else{
+  # make a local clone of moses.ini
+  safesystem("$clonecmd $___CONFIG");
+  $___CONFIG = "moses.ini";
+}
+$___CONFIG = ensure_full_path($___CONFIG);
+my $PARAMETERS;
+$PARAMETERS = $___DECODER_FLAGS;
+my $nbest_file = "zmert.best$___N_BEST_LIST_SIZE.out";
+# Run zmert to optimize lambdas
+# We need to prepare:
+#	1) decoder launch script (decoder_cmd) - must be executable
+#	2) zmert configuration file (zmert_cfg.txt)
+#	3) parameters we want to optimize (params.txt)
+#	4) decoder configuration file (decoder_cfg_inter.txt)
+my $zmert_cfg = ensure_full_path("zmert_cfg.txt");
+my $opt_params = "params.txt"; # zmert requires path relative to launch path
+my $decoder_cfg_inter = "decoder_cfg_inter.txt"; # zmert requires path relative to launch path
+my $decoder_cmd_file = ensure_full_path("decoder_cmd");
+my $iteration_file = "iteration";
+my $LAMBDAS_FILE = ensure_full_path("finalWeights.txt");
+# prepare script that will launch moses from template
+# it will include an update script that will adjust feature weights according to
+# the last zmert iteration (they are stored in file $decoder_cfg_inter)
+# prepare lauch command with all parameters
+my $decoder_cmd;
+if (defined $___JOBS) {
+  $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix zmert -queue-parameters '$queue_flags' -decoder-parameters '$PARAMETERS' -n-best-list '$nbest_file $___N_BEST_LIST_SIZE' -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > moses.out";
+} else {
+  $decoder_cmd = "$___DECODER $PARAMETERS -config $___CONFIG -inputtype $___INPUTTYPE -n-best-list $nbest_file $___N_BEST_LIST_SIZE -i $___DEV_F > moses.out";
+}
+my $zmert_decoder_cmd = "$SCRIPTS_ROOTDIR/training/zmert-decoder.pl";
+# number of factors that a given metric requires
+my $metric_num_factors = 1;
+# SemPOS metric requires 2 parameters specifying position of t_lemma and sempos factor
+# e.g. for t_lemma|sempos|factor3|factor4|... the values are 0 and 1 (default setting)
+if( $___METRIC =~ /^SemPOS$/) {
+  $___METRIC .= " 0 1";
+  $metric_num_factors = 2;
+}
+# SemPOS_BLEU metric requires 7 parameters
+# 1) weight of SemPOS 2) weight of BLEU
+# 3) index of t_lemma for SemPOS 4) index of sempos for SemPOS
+# 5) max ngram for BLEU 6) ref length strategy for BLEU
+# 7) index of factor to compute BLEU on
+elsif( $___METRIC =~ /^SemPOS_BLEU$/) {
+  $___SEMPOSBLEU_WEIGHTS =~ /^.*:.*$/ or die "--semposbleu-weights is not in format <sempos_weight>:<bleu_weight>";
+  $___SEMPOSBLEU_WEIGHTS =~ s/:/ /;
+  $___METRIC .= " $___SEMPOSBLEU_WEIGHTS 1 2 4 closest 0";
+  $metric_num_factors = 3;
+}
+elsif( $___METRIC =~ /^BLEU$/) {
+  $___METRIC .= " 4 closest";
+}
+ elsif( $___METRIC =~ /^TER$/) {
+  $___METRIC .= " nocase punc 20 50";
+}
+elsif( $___METRIC =~ /^TER-BLEU$/) {
+  $___METRIC .= " nocase punc 20 50 4 closest";
+}
+if( $___EXTRACT_SEMPOS =~ /tmt/) {
+  my $print_string = "";
+  if( $___METRIC =~ /SemPOS_BLEU/) {
+    $print_string = "Print::ForSemPOSBLEUMetric TMT_PARAM_PRINT_FOR_SEMPOS_BLEU_METRIC=m:form|t_lemma|gram/sempos TMT_PARAM_PRINT_FOR_SEMPOS_BLEU_METRIC_DESTINATION=factored_output";
+  } elsif( $___METRIC =~ /SemPOS/) {
+    $print_string = "Print::ForSemPOSMetric TMT_PARAM_PRINT_FOR_SEMPOS_METRIC=t_lemma|gram/sempos TMT_PARAM_PRINT_FOR_SEMPOS_METRIC_DESTINATION=factored_output";
+  } else {
+    die "Trying to get factors using tmt for unknown metric $___METRIC";
+  }
+  open( SCENARIO, ">$scenario_file") or die "Cannot open $scenario_file";
+  print SCENARIO << "FILE_EOF";
+SCzechW_to_SCzechM::Tokenize_joining_numbers
+SCzechW_to_SCzechM::TagMorce
+# SCzechM_to_SCzechN::Czech_named_ent_SVM_recognizer
+# SCzechM_to_SCzechN::Geo_ne_recognizer
+# SCzechM_to_SCzechN::Embed_instances
+SCzechM_to_SCzechA::McD_parser_local TMT_PARAM_MCD_CZ_MODEL=pdt20_train_autTag_golden_latin2_pruned_0.02.model
+# SCzechM_to_SCzechA::McD_parser_local TMT_PARAM_MCD_CZ_MODEL=pdt20_train_autTag_golden_latin2_pruned_0.10.model
+SCzechM_to_SCzechA::Fix_atree_after_McD
+SCzechM_to_SCzechA::Fix_is_member
+SCzechA_to_SCzechT::Mark_auxiliary_nodes
+SCzechA_to_SCzechT::Build_ttree
+SCzechA_to_SCzechT::Fill_is_member
+SCzechA_to_SCzechT::Rehang_unary_coord_conj
+SCzechA_to_SCzechT::Assign_coap_functors
+SCzechA_to_SCzechT::Fix_is_member
+SCzechA_to_SCzechT::Distrib_coord_aux
+SCzechA_to_SCzechT::Mark_clause_heads
+SCzechA_to_SCzechT::Mark_relclause_heads
+SCzechA_to_SCzechT::Mark_relclause_coref
+SCzechA_to_SCzechT::Fix_tlemmas
+SCzechA_to_SCzechT::Assign_nodetype
+SCzechA_to_SCzechT::Assign_grammatemes
+SCzechA_to_SCzechT::Detect_formeme
+SCzechA_to_SCzechT::Add_PersPron
+SCzechA_to_SCzechT::Mark_reflpron_coref
+SCzechA_to_SCzechT::TBLa2t_phaseFd
+$print_string
+FILE_EOF
+  close( SCENARIO);
+}
+my $feats_order = join( " ", keys %used_triples);
+open( DECODER_CMD, ">$decoder_cmd_file") or die "Cannot open $decoder_cmd_file";
+  print DECODER_CMD <<"FILE_EOF";
+#!/usr/bin/perl -w
+use strict;
+my %FULL2ABBR = map {my (\$a, \$b) = split/=/,\$_,2; (\$b, \$a);} split /\\s+/, "$ABBR_FULL_MAP";
+open( ITERATION, "<$iteration_file") or die "Cannot open $iteration_file";
+my \$iteration = <ITERATION>;
+close( ITERATION);
+chomp( \$iteration);
+my \@features_order = qw( $feats_order );
+# extract feature weights from last zmert iteration (stored in \$decoder_cfg_inter)
+print "Updating decoder config file from file $decoder_cfg_inter\n";
+my \$moses_ini = "$___CONFIG";
+open( IN, "$decoder_cfg_inter") or die "Cannot open file $decoder_cfg_inter (reading updated lambdas)";
+FILE_EOF
+print DECODER_CMD <<'FILE_EOF';
+my %lambdas = ();
+my $lastName = "";
+while( my $line = <IN>) {
+  chomp($line);
+  my ($name, $val) = split( /\s+/, $line);
+  $name =~ s/_\d+$//;      # remove index of the lambda
+  push( @{$lambdas{$name}}, $val);
+}
+close(IN);
+my $moses_ini_old = "$moses_ini";
+$moses_ini_old =~ s/^(.*)\/([^\/]+)$/$1\/run$iteration.$2/;
+$moses_ini_old = $moses_ini.".orig" if( $iteration == 0);
+safesystem("mv $moses_ini $moses_ini_old");
+# update moses.ini
+open( INI_OLD, "<$moses_ini_old") or die "Cannot open config file $moses_ini_old";
+open( INI, ">$moses_ini") or die "Cannot open config file $moses_ini";
+while( my $line = <INI_OLD>) {
+  if( $line =~ m/^\[(weight-.+)\]$/) {
+    my $name = $FULL2ABBR{$1};
+    print STDERR "Updating weight: $1, $name\n";
+    print INI "$line";
+    foreach( @{$lambdas{$name}}) {
+      print INI "$_\n";
+      print STDERR "NEW: $_\tOLD:";
+      $line = <INI_OLD>;
+      print STDERR $line;
+    }
+  } else {
+    print INI $line;
+  }
+}
+close(INI_OLD);
+close(INI);
+FILE_EOF
+print DECODER_CMD <<"FILE_EOF";
+print "Executing: $decoder_cmd";
+safesystem("$decoder_cmd") or die "Failed to execute $decoder_cmd";
+# update iteration number in intermediate config file
+++\$iteration;
+safesystem("echo \$iteration > $iteration_file");
+# modify the nbest-list to conform the zmert required format
+# <i> ||| <candidate_translation> ||| featVal_1 featVal_2 ... featVal_m
+my \$nbest_file_orig = "$nbest_file".".orig";
+safesystem( "mv $nbest_file \$nbest_file_orig");
+open( NBEST_ORIG, "<\$nbest_file_orig") or die "Cannot open original nbest-list \$nbest_file_orig";
+open( NBEST, ">$nbest_file") or die "Cannot open modified nbest-list $nbest_file";
+my \$line_num = 0;
+FILE_EOF
+if( "$___EXTRACT_SEMPOS" =~ /factors/) {
+  print DECODER_CMD <<"FILE_EOF";
+my (undef, \$args) = split( /:/, "$___EXTRACT_SEMPOS");
+my \$factor_count = $metric_num_factors;
+FILE_EOF
+print DECODER_CMD <<'FILE_EOF';
+my @indices = split( /,/, $args);
+die "Specified ".scalar @indices." factors to extract but selected metric requires $factor_count factors"
+  if( @indices != $factor_count);
+while( my $line = <NBEST_ORIG>) {
+  my @array = split( /\|\|\|/, $line);
+  # remove feature names from the feature scores string
+  $array[2] = extractScores( $array[2]);
+  my @tokens = split( /\s/, $array[1]); # split sentence into words
+  $array[1] = "";
+  foreach my $token (@tokens) {
+    next if $token eq "";
+    my @factors = split( /\|/, $token);
+    my $put_separator = 0;
+    foreach my $index (@indices) {
+      die "Cannot extract factor with index $index from '$token'" if ($index > $#factors);
+      $array[1] .= '|' if ($put_separator);	# separator between factors
+      $array[1] .= $factors[$index];
+      $put_separator = 1;
+    }
+    $array[1] .= " ";	# space between words
+  }
+  print NBEST join( '|||', @array);
+}
+FILE_EOF
+} elsif( "$___EXTRACT_SEMPOS" =~ /tmt/) {
+  print DECODER_CMD <<"FILE_EOF";
+# run TectoMT to analyze sentences
+print STDERR "Analyzing candidates using $srunblocks_cmd\n";
+my \$nbest_factored = "$nbest_file.factored";
+open( NBEST_FACTORED, "|$srunblocks_cmd > \$nbest_factored") or die "Cannot open pipe to command $srunblocks_cmd";
+FILE_EOF
+print DECODER_CMD <<'FILE_EOF';
+my $line_count = 0;
+my @out = ();
+while( my $line = <NBEST_ORIG>) {
+  my @array = split( /\|\|\|/, $line);
+  die "Nbest-list does not have required format (values separated by '|||')" if ($#array != 3);
+  # remove feature names from the feature scores string
+  $array[2] = extractScores( $array[2]);
+  push( @out, \@array); # store line with scores for output
+  # select only word forms
+  my $sentence = "";
+  foreach my $fact ( split /\s+/, $array[1]) {
+    next if( $fact eq "");
+    my @fact_array = split( /\|/, $fact);
+    $sentence .= "$fact_array[0] ";
+  }
+  # analyze sentence via TectoMT using scenario
+  print NBEST_FACTORED "$sentence\n";
+  ++$line_count;
+}
+close( NBEST_ORIG);
+close( NBEST_FACTORED);
+open( NBEST_FACTORED, "<$nbest_factored") or die "Cannot open $nbest_factored";
+my $line_count_check = 0;
+while( my $line = <NBEST_FACTORED>) {
+  chomp( $line);
+  my $array_ref = shift( @out);
+  $array_ref->[1] = $line;
+  print NBEST join( '|||', @{$array_ref});
+  ++$line_count_check;
+}
+die "Error: Sent $line_count sentences to analyze but got only $line_count_check back"
+  if( $line_count != $line_count_check);
+FILE_EOF
+} elsif ($___EXTRACT_SEMPOS eq "none") {
+print DECODER_CMD <<'FILE_EOF';
+while( my $line = <NBEST_ORIG>) {
+  my @array = split( /\|\|\|/, $line);
+  # remove feature names from the feature scores string
+  $array[2] = extractScores( $array[2]);
+  print NBEST join( '|||', @array);
+}
+FILE_EOF
+} else {
+  die "Unknown type of factor extraction: $___EXTRACT_SEMPOS";
+}
+print DECODER_CMD <<'FILE_EOF';
+close( NBEST);
+close( NBEST_ORIG);
+# END OF BODY
+sub extractScores {
+  my $scores = shift;
+  my (%scores_hash, $name);
+  foreach my $score_or_name (split /\s+/, $scores) {
+    if( $score_or_name =~ s/://) {
+      $name = $score_or_name;
+    } elsif ($score_or_name =~ /\d/) {
+      die "Cannot guess nbest-list first feature score name" if( not defined $name);
+      $scores_hash{$name} .= "$score_or_name ";
+    } else {
+      die "Unknown string ($score_or_name) in nbest-list feature scores section (not a feature name or score)"
+        if( $score_or_name =~ /\S/);
+    }
+  }
+  $scores = "";
+  foreach $name (@features_order) {
+    $scores .= $scores_hash{$name};
+  }
+  #print STDERR "REORDERED SCORES: $scores\n";
+  return $scores;
+}
+sub safesystem {
+  print STDERR "Executing: @_\n";
+  system(@_);
+  if ($? == -1) {
+      print STDERR "Failed to execute: @_\n  $!\n";
+      exit(1);
+  }
+  elsif ($? & 127) {
+      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
+          ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+  }
+  else {
+    my $exitcode = $? >> 8;
+    print STDERR "Exit code: $exitcode\n" if $exitcode;
+    return ! $exitcode;
+  }
+}
+FILE_EOF
+close( DECODER_CMD);
+# make the decoder lauch script executable
+safesystem("chmod a+x $decoder_cmd_file");
+# analyze reference if necessary
+if( $___EXTRACT_SEMPOS =~ /tmt/) {
+  my $part = 0;
+  foreach my $ref (@references) {
+    my $line_count = 0;
+    print STDERR "Analyzing references using $srunblocks_cmd\n";
+    open( REF_IN, "<$ref") or die "Cannot open $ref";
+    my $ref_factored = "$ref.factored.$part";
+    push( @references_factored, $ref_factored);
+    open( REF_FACTORED, "|$srunblocks_cmd > $ref_factored");
+    while( my $line = <REF_IN>) {
+      # analyze sentence via TectoMT using scenario in file $scerario_file
+      print REF_FACTORED $line;
+      ++$line_count;
+    }
+    close( REF_IN);
+    close( REF_FACTORED);
+    my $line_count_check = 0;
+    open( REF_FACTORED, "<$ref_factored") or die "Cannot open $ref_factored";
+    ++$line_count_check while( <REF_FACTORED>);
+    die "Error: Sent $line_count sentences to analyze but got $line_count_check back"
+     if( $line_count != $line_count_check);
+    close( REF_FACTORED);
+    ++$part;
+  }
+  print STDERR "References analyzed\n";
+} else {
+  push( @references_factored, @references);
+}
+my $ref_stem = $references_factored[0];
+$ref_stem =~ s/\d+$// if( $#references_factored); # get the file stem if we have more than one refs
+$ref_stem =~ s/.*\/([^\/]+)$/..\/$1/;
+# prepare zmert configuration file
+open( ZMERT_CFG, ">$zmert_cfg") or die "Cannot open $zmert_cfg";
+# FILES
+# print ZMERT_CFG "-dir\t$___PATH_FROM_LAUNCHDIR\n";	# working path (relative to the lauch path)
+# print ZMERT_CFG "-r\t$___DEV_E\n";	# file(s) containing references
+print ZMERT_CFG "-r\t$ref_stem\n";	# file(s) containing references
+print ZMERT_CFG "-rps\t".scalar(@references)."\n";	# number of references per sentence
+print ZMERT_CFG "-txtNrm\t0\n";	# we use our own text normalization
+print ZMERT_CFG "-p\t$opt_params\n";	# file containig parameter names, initial values, ranges
+print ZMERT_CFG "-fin\t$___LAMBDAS_OUT\n" if(defined $___LAMBDAS_OUT);	# file where the final weight vector is written
+# MERT CONFIGURATION
+print ZMERT_CFG "-m\t$___METRIC\n";
+print ZMERT_CFG "-maxIt\t$___MAX_MERT_ITER\n" if( $___MAX_MERT_ITER);	# maximum number of MERT iterations
+# print ZMERT_CFG "-prevIt\t$PREV_MERT_ITER\n";
+# number of iteration before considering an early exit
+# print ZMERT_CFG "-minIt\t$MIN_MERT_ITER\n";
+# number of consecutive iterations that must satisfy some early stopping
+# criterion to cause an early exit
+# print ZMERT_CFG "-stopIt\t$STOP_MIN_ITER\n";
+# early exit criterion: no weight changes by more than $LAMBDA_CHANGE;
+# default value: -1 (this criterion is never investigated)
+# print ZMERT_CFG "-stopSig\t$LAMBDA_CHANGE\n";
+# save intermediate decoder config files (1) or decoder outputs (2) or both (3) or neither (0)
+print ZMERT_CFG "-save\t$___SAVE_INTER\n";
+# print ZMERT_CFG "-ipi\t$INITS_PER_ITER\n";	# number of intermediate initial points per iteration
+# print ZMERT_CFG "-opi\t$ONCE_PER_ITER\n";	# modify a parameter only once per iteration;
+# print ZMERT_CFG "-rand\t$RAND_INIT\n";		# choose initial points randomly
+print ZMERT_CFG "-seed\t$___PREDICTABLE_SEEDS\n" if($___PREDICTABLE_SEEDS);	# initialize the random number generator
+# DECODER SPECIFICATION
+print ZMERT_CFG "-cmd\t$decoder_cmd_file\n";	# name of file containing commands to run the decoder
+print ZMERT_CFG "-decOut\t$nbest_file\n";	# name of the n-best file produced by the decoder
+# print ZMERT_CFG "-decExit\t$DECODER_EXIT_CODE\n";	# value returned by decoder after successful exit
+print ZMERT_CFG "-dcfg\t$decoder_cfg_inter\n";		# name of intermediate decoder configuration file
+print ZMERT_CFG "-N\t$___N_BEST_LIST_SIZE\n";
+# OUTPUT SPECIFICATION
+print ZMERT_CFG "-v\t$___MERT_VERBOSE\n";	# zmert verbosity level (0-2)
+print ZMERT_CFG "-decV\t$___DECODER_VERBOSE\n";	# decoder output printed (1) or ignored (0)
+close( ZMERT_CFG);
+my ($name, $num, $val, $min, $max);
+# prepare file with parameters to optimize
+open( PARAMS, ">$opt_params") or die "Cannot open file $opt_params with parameters to optimize";
+my $optString;
+foreach $name (keys %used_triples) {
+  $num = 0;
+  foreach my $triple (@{$used_triples{$name}}) {
+    ($val, $min, $max) = @$triple;
+    my ($minRand, $maxRand) = ($min, $max);
+    # the file should describe features to optimize in the following format:
+    # "featureName ||| defValue optString minVal maxVal minRandVal maxRandVal"
+    #    optString can be 'Opt' or 'Fix'
+   $optString = "Opt";
+   if( defined $___ACTIVATE_FEATURES and not $active_features{$name."_$num"}) {
+     $optString = "Fix";
+   }
+   print PARAMS "$name"."_$num ||| $val $optString $min $max $minRand $maxRand\n";
+    ++$num;
+  }
+}
+print PARAMS  "normalization = $___NORM\n";
+close( PARAMS);
+# prepare intermediate config file from which moses.ini will be updated before each launch
+open( DEC_CFG, ">$decoder_cfg_inter") or die "Cannot open file $decoder_cfg_inter";
+foreach $name (keys %used_triples) {
+  $num = 0;
+  foreach my $tri (@{$used_triples{$name}}) {
+    ($val, $min, $max) = @$tri;
+    print DEC_CFG $name."_$num $val\n";
+    ++$num;
+  }
+}
+close( DEC_CFG);
+open( ITER, ">$iteration_file") or die "Cannot open file $iteration_file";
+print ITER "1";
+close( ITER);
+# launch zmert
+my $javaMaxMem = ""; # -maxMem 4000" # use at most 4000MB of memory
+my $cmd = "java -cp $zmert_classpath ZMERT $javaMaxMem $zmert_cfg";
+print "Zmert start at ".`date`;
+if ( 0 && defined $___JOBS) {
+  # NOT WORKING - this branch needs to init environment variables
+  safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -stderr=$mert_logfile -queue-parameter='$queue_flags'") or die "Failed to start zmert (via qsubwrapper $qsubwrapper)";
+} else {
+  safesystem("$cmd 2> $mert_logfile") or die "Failed to run zmert";
+}
+print "Zmert finished at ".`date`;
+# RELEVANT ONLY FOR PLAYGROUND at UFAL, CHARLES UNIVESITY IN PRAGUE
+# copy optimized moses.ini and original run1.moses.ini to the working directory
+if( $___FILTER_PHRASE_TABLE) {
+  my ($config_opt, $config_std, $config_base) = ($___CONFIG, $___CONFIG, "$cwd/moses.abs.ini");
+  $config_std =~ s/^(.*)\/([^\/]+)$/$1\/run1.$2/;
+  mergeConfigs( $config_base, $___CONFIG);
+  mergeConfigs( $config_base, $config_std);
+}
+# chdir back to the original directory # useless, just to remind we were not there
+chdir($cwd);
+} # end of local scope
+sub mergeConfigs {
+  my ($config_base, $config_weights) = @_;
+  my $config_new = $config_weights;
+  $config_new =~ s/^.*\///;
+  open BASE, "<$config_base" or die "Cannot open $config_base";
+  open WEIGHTS, "<$config_weights" or die "Cannot open $config_weights";
+  open NEW, ">$config_new" or die "Cannot open $config_new";
+  my $cont = 1;
+  my ($b_line, $w_line);
+  while( $cont) {
+    $b_line = <BASE>;
+    $w_line = <WEIGHTS>;
+    $cont = (defined $b_line and defined $w_line);
+    if( $b_line =~ /^\[weight-/) {
+      if( $w_line !~ /^\[weight-/) { die "mergeConfigs: $config_base and $config_weights do not have the same format"; }
+      print NEW $w_line;
+      $b_line = <BASE>; $w_line = <WEIGHTS>;
+      while( $w_line =~ /\d/) {
+        print NEW $w_line;
+        $b_line = <BASE>; $w_line = <WEIGHTS>;
+      }
+      print NEW $b_line;
+    } else {
+      print NEW $b_line;
+    }
+  }
+  close BASE;
+  close WEIGHTS;
+  close NEW;
+}
+sub dump_triples {
+  my $triples = shift;
+  foreach my $name (keys %$triples) {
+    foreach my $triple (@{$triples->{$name}}) {
+      my ($val, $min, $max) = @$triple;
+    }
+  }
+}
+sub safesystem {
+  print STDERR "Executing: @_\n";
+  system(@_);
+  if ($? == -1) {
+      print STDERR "Failed to execute: @_\n  $!\n";
+      exit(1);
+  }
+  elsif ($? & 127) {
+      printf STDERR "Execution of: @_\n  died with signal %d, %s coredump\n",
+          ($? & 127),  ($? & 128) ? 'with' : 'without';
+      exit(1);
+  }
+  else {
+    my $exitcode = $? >> 8;
+    print STDERR "Exit code: $exitcode\n" if $exitcode;
+    return ! $exitcode;
+  }
+}
+sub ensure_full_path {
+    my $PATH = shift;
+$PATH =~ s/\/nfsmnt//;
+    return $PATH if $PATH =~ /^\//;
+    my $dir = `pawd 2>/dev/null`;
+    if(!$dir){$dir = `pwd`;}
+    chomp($dir);
+    $PATH = $dir."/".$PATH;
+    $PATH =~ s/[\r\n]//g;
+    $PATH =~ s/\/\.\//\//g;
+    $PATH =~ s/\/+/\//g;
+    my $sanity = 0;
+    while($PATH =~ /\/\.\.\// && $sanity++<10) {
+        $PATH =~ s/\/+/\//g;
+        $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
+    }
+    $PATH =~ s/\/[^\/]+\/\.\.$//;
+    $PATH =~ s/\/+$//;
+$PATH =~ s/\/nfsmnt//;
+    return $PATH;
+}
+sub scan_config {
+  my $ini = shift;
+  my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting
+  # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance)
+  # as we walk though the ini file, we record how many extra lambdas do we need
+  # and finally, we report it
+  # in which field (counting from zero) is the filename to check?
+  my %where_is_filename = (
+    "ttable-file" => 4,
+    "generation-file" => 3,
+    "lmodel-file" => 3,
+    "distortion-file" => 3,
+    "global-lexical-file" => 1,
+  );
+  # by default, each line of each section means one lambda, but some sections
+  # explicitly state a custom number of lambdas
+  my %where_is_lambda_count = (
+    "ttable-file" => 3,
+    "generation-file" => 2,
+    "distortion-file" => 2,
+  );
+  open INI, $ini or die "Can't read $ini";
+  my $section = undef;  # name of the section we are reading
+  my $shortname = undef;  # the corresponding short name
+  my $nr = 0;
+  my $error = 0;
+  my %defined_files;
+  my %defined_steps;  # check the ini file for compatible mapping steps and actually defined files
+  while (<INI>) {
+    $nr++;
+    next if /^\s*#/; # skip comments
+    if (/^\[([^\]]*)\]\s*$/) {
+      $section = $1;
+      $shortname = $TABLECONFIG2ABBR{$section};
+      next;
+    }
+    if (defined $section && $section eq "mapping") {
+      # keep track of mapping steps used
+      $defined_steps{$1}++ if /^([TG])/ || /^\d+ ([TG])/;
+    }
+    if (defined $section && defined $where_is_filename{$section}) {
+      print "$section -> $where_is_filename{$section}\n";
+      # this ini section is relevant to lambdas
+      chomp;
+      my @flds = split / +/;
+      my $fn = $flds[$where_is_filename{$section}];
+      if (defined $fn && $fn !~ /^\s+$/) {
+	  print "checking weight-count for $section\n";
+        # this is a filename! check it
+	if ($fn !~ /^\//) {
+	  $error = 1;
+	  print STDERR "$inishortname:$nr:Filename not absolute: $fn\n";
+	}
+	if (! -s $fn && ! -s "$fn.gz" && ! -s "$fn.binphr.idx" && ! -s "$fn.binlexr.idx" ) {
+	  $error = 1;
+	  print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n";
+	}
+	# remember the number of files used, to know how many lambdas do we need
+        die "No short name was defined for section $section!"
+          if ! defined $shortname;
+        # how many lambdas does this model need?
+        # either specified explicitly, or the default, i.e. one
+        my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1;
+        print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose;
+        if (!defined $___LAMBDA && (!defined $additional_triples->{$shortname} || scalar(@{$additional_triples->{$shortname}}) < $needlambdas)) {
+          print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for only "
+            .scalar(@{$additional_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";
+          $error = 1;
+        }
+	else {
+	    # note: table may use less parameters than the maximum number
+	    # of triples
+	    for(my $lambda=0;$lambda<$needlambdas;$lambda++) {
+		my ($start, $min, $max)
+		    = @{${$additional_triples->{$shortname}}[$lambda]};
+		push @{$used_triples{$shortname}}, [$start, $min, $max];
+	    }
+	}
+        $defined_files{$shortname}++;
+      }
+    }
+  }
+  die "$inishortname: File was empty!" if !$nr;
+  close INI;
+  for my $pair (qw/T=tm=translation G=g=generation/) {
+    my ($tg, $shortname, $label) = split /=/, $pair;
+    $defined_files{$shortname} = 0 if ! defined $defined_files{$shortname};
+    $defined_steps{$tg} = 0 if ! defined $defined_steps{$tg};
+    if ($defined_files{$shortname} != $defined_steps{$tg}) {
+      print STDERR "$inishortname: You defined $defined_files{$shortname} files for $label but use $defined_steps{$tg} in [mapping]!\n";
+      $error = 1;
+    }
+  }
+  # distance-based distortion
+  if ($___ASYNC == 1)
+  {
+    print STDERR "ASYNC distortion & word penalty";
+    my @my_array;
+    for(my $i=0 ; $i < $defined_steps{"T"} ; $i++)
+    {
+      push @my_array, [ 1.0, 0.0, 2.0 ];
+    }
+    push @{$used_triples{"d"}}, @my_array;
+    @my_array = ();
+    for(my $i=0 ; $i < $defined_steps{"T"} ; $i++)
+    {
+      push @my_array, [ 0.5, -1.0, 1.0 ];
+    }
+    push @{$used_triples{"w"}}, @my_array;
+    # debug print
+    print "distortion:";
+    my $refarray=$used_triples{"d"};
+    my @vector=@$refarray;
+    foreach my $subarray (@vector) {
+      my @toto=@$subarray;
+      print @toto,"\n";
+    }
+    #exit 1;
+  }
+  else
+  {
+    print STDERR "SYNC distortion";
+    push @{$used_triples{"d"}}, [1.0, 0.0, 2.0];
+  }
+  exit(1) if $error;
+  return (\%defined_files);
+}

mosesdecoder/moses/TranslationModel/UG/TargetPhraseCollectionCache.h ADDED Viewed

	@@ -0,0 +1,47 @@

+// -*- c++ -*-
+#pragma once
+#include <time.h>
+#include "moses/TargetPhraseCollection.h"
+#include <boost/atomic.hpp>
+#include "mm/ug_typedefs.h"
+namespace Moses
+{
+  class TPCollWrapper;
+  class TPCollCache
+  {
+  public:
+    // typedef boost::unordered_map<uint64_t, SPTR<TPCollWrapper> > cache_t;
+    typedef std::map<uint64_t, SPTR<TPCollWrapper> > cache_t;
+  private:
+    uint32_t m_capacity; // capacity of cache
+    cache_t     m_cache; // maps from ids to items
+    cache_t::iterator m_qfirst, m_qlast;
+    mutable boost::shared_mutex  m_lock;
+  public:
+    TPCollCache(size_t capacity=10000);
+    SPTR<TPCollWrapper>
+    get(uint64_t key, size_t revision);
+  };
+  // wrapper around TargetPhraseCollection with reference counting
+  // and additional members for caching purposes
+  class TPCollWrapper
+    : public TargetPhraseCollection
+  {
+    friend class TPCollCache;
+    friend class Mmsapt;
+  public:
+    TPCollCache::cache_t::iterator prev, next;
+  public:
+    mutable boost::shared_mutex lock;
+    size_t   const revision; // rev. No. of the underlying corpus
+    uint64_t const      key; // phrase key
+    TPCollWrapper(uint64_t const key, size_t const rev);
+    ~TPCollWrapper();
+  };
+}

mosesdecoder/moses/TranslationModel/UG/bitext-find.cc ADDED Viewed

	@@ -0,0 +1,151 @@

+#include <boost/program_options.hpp>
+#include "mm/ug_bitext.h"
+#include <string>
+using namespace std;
+using namespace Moses;
+using namespace sapt;
+namespace po=boost::program_options;
+typedef L2R_Token<SimpleWordId> Token;
+typedef mmBitext<Token> mmbitext;
+typedef Bitext<Token>::tsa tsa;
+string bname, L1, L2, Q1, Q2;
+size_t maxhits;
+void interpret_args(int ac, char* av[]);
+void
+write_sentence
+(Ttrack<Token> const& T, uint32_t const sid, TokenIndex const& V, ostream& out)
+{
+  Token const* t = T.sntStart(sid);
+  Token const* e = T.sntEnd(sid);
+  // size_t i = 0;
+  while (t < e)
+    {
+      // out << i++ << ":";
+      out << V[t->id()];
+      if (++t < e) out << " ";
+    }
+}
+bool
+fill(string const& query, TSA<Token> const& tsa,
+     TokenIndex const& V, bitvector& v)
+{
+  v.resize(tsa.getCorpus()->size());
+  Bitext<Token>::iter m(&tsa);
+  istringstream buf(query); string w;
+  while (buf >> w)
+    if (!m.extend(V[w]))
+      return false;
+  m.markSentences(v);
+  return true;
+}
+int main(int argc, char* argv[])
+{
+  interpret_args(argc, argv);
+  if (Q1.empty() && Q2.empty()) exit(0);
+  boost::shared_ptr<mmbitext> B(new mmbitext); string w;
+  B->open(bname, L1, L2);
+  Bitext<Token>::iter m1(B->I1.get(), *B->V1, Q1);
+  if (Q1.size() && m1.size() == 0) exit(0);
+  Bitext<Token>::iter m2(B->I2.get(), *B->V2, Q2);
+  if (Q2.size() && m2.size() == 0) exit(0);
+  bitvector check(B->T1->size());
+  if (Q1.size() == 0 || Q2.size() == 0) check.set();
+  else (m2.markSentences(check));
+  Bitext<Token>::iter& m = m1.size() ? m1 : m2;
+  char const* x = m.lower_bound(-1);
+  char const* stop = m.upper_bound(-1);
+  uint64_t sid;
+  ushort off;
+  boost::taus88 rnd;
+  size_t N = m.approxOccurrenceCount();
+  maxhits = min(N, maxhits);
+  size_t k = 0; // selected
+  for (size_t i = 0; x < stop; ++i)
+    {
+      x = m.root->readSid(x,stop,sid);
+      x = m.root->readOffset(x,stop,off);
+      if (!check[sid]) continue;
+      size_t r = (N - i) * rnd()/(rnd.max()+1.) + k;
+      if (maxhits != N && r >= maxhits) continue;
+      ++k;
+      size_t s1,s2,e1,e2; int po_fwd=-1,po_bwd=-1;
+      std::vector<unsigned char> caln;
+      // cout << sid  << " " << B->docname(sid) << std::endl;
+      if (!B->find_trg_phr_bounds(sid, off, off+m.size(),
+				 s1,s2,e1,e2,po_fwd,po_bwd,
+				 &caln, NULL, &m == &m2))
+	{
+	  // cout << "alignment failure" << std::endl;
+	}
+      std::cout << sid  << " " << B->sid2docname(sid)
+		<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
+		<< "\n";
+      write_sentence(*B->T1, sid, *B->V1, std::cout); std::cout << "\n";
+      write_sentence(*B->T2, sid, *B->V2, std::cout); std::cout << "\n";
+      B->write_yawat_alignment(sid,
+			       m1.size() ? &m1 : NULL,
+			       m2.size() ? &m2 : NULL, std::cout);
+      std::cout << std::endl;
+    }
+}
+void
+interpret_args(int ac, char* av[])
+{
+  po::variables_map vm;
+  po::options_description o("Options");
+  o.add_options()
+    ("help,h",  "print this message")
+    ("maxhits,n", po::value<size_t>(&maxhits)->default_value(25),
+     "max. number of hits")
+    ("q1", po::value<string>(&Q1), "query in L1")
+    ("q2", po::value<string>(&Q2), "query in L2")
+    ;
+  po::options_description h("Hidden Options");
+  h.add_options()
+    ("bname", po::value<string>(&bname), "base name of corpus")
+    ("L1", po::value<string>(&L1), "L1 tag")
+    ("L2", po::value<string>(&L2), "L2 tag")
+    ;
+  h.add(o);
+  po::positional_options_description a;
+  a.add("bname",1);
+  a.add("L1",1);
+  a.add("L2",1);
+  po::store(po::command_line_parser(ac,av)
+            .options(h)
+            .positional(a)
+            .run(),vm);
+  po::notify(vm);
+  if (vm.count("help"))
+    {
+      std::cout << "\nusage:\n\t" << av[0]
+           << " [options] [--q1=<L1string>] [--q2=<L2string>]" << std::endl;
+      std::cout << o << std::endl;
+      exit(0);
+    }
+}

mosesdecoder/moses/TranslationModel/UG/check-coverage.cc ADDED Viewed

	@@ -0,0 +1,82 @@

+// #include "mmsapt.h"
+// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+// #include "moses/TranslationTask.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+#include "mm/ug_bitext.h"
+#include "generic/file_io/ug_stream.h"
+#include <string>
+#include <sstream>
+using namespace Moses;
+using namespace sapt;
+using namespace std;
+using namespace boost;
+typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
+typedef mmBitext<Token> bitext_t;
+struct mycmp
+{
+  bool operator() (pair<string,uint32_t> const& a,
+		   pair<string,uint32_t> const& b) const
+  {
+    return a.second > b.second;
+  }
+};
+string
+basename(string const path, string const suffix)
+{
+  size_t p = path.find_last_of("/");
+  size_t k = path.size() - suffix.size();
+  cout << path << " " << suffix << endl;
+  cout << path.substr(0,p) << " " << path.substr(k) << endl;
+  return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
+}
+int main(int argc, char* argv[])
+{
+  boost::shared_ptr<bitext_t> B(new bitext_t);
+  B->open(argv[1],argv[2],argv[3]);
+  string line;
+  string ifile = argv[4];
+  string docname = basename(ifile, string(".") + argv[2] + ".gz");
+  boost::iostreams::filtering_istream in;
+  ugdiss::open_input_stream(ifile,in);
+  while(getline(in,line))
+    {
+      cout << line << " [" << docname << "]" << endl;
+      vector<id_type> snt;
+      B->V1->fillIdSeq(line,snt);
+      for (size_t i = 0; i < snt.size(); ++i)
+	{
+	  bitext_t::iter m(B->I1.get());
+	  for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
+	    {
+	      if (m.ca() > 500) continue;
+	      sapt::tsa::ArrayEntry I(m.lower_bound(-1));
+	      char const* stop = m.upper_bound(-1);
+	      map<string,uint32_t> cnt;
+	      while (I.next != stop)
+		{
+		  m.root->readEntry(I.next,I);
+		  ++cnt[B->sid2docname(I.sid)];
+		}
+	      cout << setw(8) << int(m.ca()) << " "
+		   << B->V1->toString(&snt[i],&snt[k+1]) << endl;
+	      typedef pair<string,uint32_t> entry;
+	      vector<entry> ranked; ranked.reserve(cnt.size());
+	      BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);
+	      sort(ranked.begin(),ranked.end(),mycmp());
+	      BOOST_FOREACH(entry const& e, ranked)
+		cout << setw(12) << " " << e.second << " " << e.first << endl;
+	      cout << endl;
+	    }
+	}
+    }
+}

mosesdecoder/moses/TranslationModel/UG/filter-pt.cc ADDED Viewed

	@@ -0,0 +1,669 @@

+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+// significance filtering for phrase tables as described in
+// H. Johnson, et al. (2007) Improving Translation Quality
+// by Discarding Most of the Phrasetable. EMNLP 2007.
+// Implemented by Marcin Junczys-Dowmunt
+// recommended use: -l a+e -n <ttable-limit>
+#include <cstring>
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <iostream>
+#include <set>
+#include <boost/thread/tss.hpp>
+#include <boost/thread.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/program_options.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/foreach.hpp>
+#ifdef WIN32
+#include "WIN32_functions.h"
+#else
+#include <unistd.h>
+#endif
+#include "mm/ug_bitext.h"
+// constants
+const size_t MINIMUM_SIZE_TO_KEEP = 10000;     // increase this to improve memory usage,
+// reduce for speed
+const std::string SEPARATOR       = " ||| ";
+const double ALPHA_PLUS_EPS  = -1000.0;        // dummy value
+const double ALPHA_MINUS_EPS = -2000.0;        // dummy value
+// configuration params
+int pfe_filter_limit = 0;               // 0 = don't filter anything based on P(f|e)
+bool print_cooc_counts = false;         // add cooc counts to phrase table?
+bool print_neglog_significance = false; // add -log(p) to phrase table?
+double sig_filter_limit = 0;            // keep phrase pairs with -log(sig) > sig_filter_limit
+//    higher = filter-more
+bool pef_filter_only = false;           // only filter based on pef
+bool hierarchical = false;
+double p_111 = 0.0;                     // alpha
+size_t pt_lines = 0;
+size_t nremoved_sigfilter = 0;
+size_t nremoved_pfefilter = 0;
+typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
+typedef sapt::mmTtrack<Token> ttrack_t;
+typedef sapt::mmTSA<Token> tsa_t;
+typedef sapt::TokenIndex tind_t;
+int num_lines;
+boost::mutex in_mutex;
+boost::mutex out_mutex;
+boost::mutex err_mutex;
+typedef size_t TextLenType;
+typedef boost::shared_ptr<std::vector<TextLenType> > SentIdSet;
+class Cache {
+  typedef std::pair<SentIdSet, clock_t> ClockedSet;
+  typedef boost::unordered_map<std::string, ClockedSet> ClockedMap;
+  public:
+    SentIdSet get(const std::string& phrase) {
+      boost::shared_lock<boost::shared_mutex> lock(m_mutex);
+      if(m_cont.count(phrase)) {
+        ClockedSet& set = m_cont[phrase];
+        set.second = clock();
+        return set.first;
+      }
+      return SentIdSet( new SentIdSet::element_type() );
+    }
+    void put(const std::string& phrase, const SentIdSet set) {
+      boost::unique_lock<boost::shared_mutex> lock(m_mutex);
+      m_cont[phrase] = std::make_pair(set, clock());
+    }
+    static void set_max_cache(size_t max_cache) {
+      s_max_cache = max_cache;
+    }
+    void prune() {
+      if(s_max_cache > 0) {
+        boost::upgrade_lock<boost::shared_mutex> lock(m_mutex);
+        if(m_cont.size() > s_max_cache) {
+          std::vector<clock_t> clocks;
+          for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
+            clocks.push_back(it->second.second);
+          std::sort(clocks.begin(), clocks.end());
+          clock_t out = clocks[m_cont.size() - s_max_cache];
+          boost::upgrade_to_unique_lock<boost::shared_mutex> uniq_lock(lock);
+          for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
+            if(it->second.second < out)
+              m_cont.erase(it);
+        }
+      }
+    }
+  private:
+    ClockedMap m_cont;
+    boost::shared_mutex m_mutex;
+    static size_t s_max_cache;
+};
+size_t Cache::s_max_cache = 0;
+struct SA {
+  tind_t V;
+  boost::shared_ptr<ttrack_t> T;
+  tsa_t I;
+  Cache cache;
+};
+std::vector<boost::shared_ptr<SA> > e_sas;
+std::vector<boost::shared_ptr<SA> > f_sas;
+#undef min
+void usage()
+{
+  std::cerr << "\nFilter phrase table using significance testing as described\n"
+            << "in H. Johnson, et al. (2007) Improving Translation Quality\n"
+            << "by Discarding Most of the Phrasetable. EMNLP 2007.\n";
+}
+struct PTEntry {
+  PTEntry(const std::string& str, int index);
+  std::string f_phrase;
+  std::string e_phrase;
+  std::string extra;
+  std::string scores;
+  float pfe;
+  int cf;
+  int ce;
+  int cfe;
+  float nlog_pte;
+  void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
+    cfe = _cef;
+    cf = _cf;
+    ce = _ce;
+    nlog_pte = nlp;
+  }
+};
+PTEntry::PTEntry(const std::string& str, int index) :
+  cf(0), ce(0), cfe(0), nlog_pte(0.0)
+{
+  size_t pos = 0;
+  std::string::size_type nextPos = str.find(SEPARATOR, pos);
+  this->f_phrase = str.substr(pos,nextPos);
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  this->e_phrase = str.substr(pos,nextPos-pos);
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  if (nextPos < str.size()) {
+    this->scores = str.substr(pos,nextPos-pos);
+    pos = nextPos + SEPARATOR.size();
+    this->extra = str.substr(pos);
+  }
+  else {
+    this->scores = str.substr(pos,str.size()-pos);
+  }
+  int c = 0;
+  std::string::iterator i=scores.begin();
+  if (index > 0) {
+    for (; i != scores.end(); ++i) {
+      if ((*i) == ' ') {
+        c++;
+        if (c == index) break;
+      }
+    }
+  }
+  if (i != scores.end()) {
+    ++i;
+  }
+  char f[24];
+  char *fp=f;
+  while (i != scores.end() && *i != ' ') {
+    *fp++=*i++;
+  }
+  *fp++=0;
+  this->pfe = atof(f);
+}
+struct PfeComparer {
+  bool operator()(const PTEntry* a, const PTEntry* b) const {
+    return a->pfe > b->pfe;
+  }
+};
+struct NlogSigThresholder {
+  NlogSigThresholder(float threshold) : t(threshold) {}
+  float t;
+  bool operator()(const PTEntry* a) const {
+    if (a->nlog_pte < t) {
+      delete a;
+      return true;
+    } else return false;
+  }
+};
+std::ostream& operator << (std::ostream& os, const PTEntry& pp)
+{
+  os << pp.f_phrase << " ||| " << pp.e_phrase;
+  os << " ||| " << pp.scores;
+  if (pp.extra.size()>0) os << " ||| " << pp.extra;
+  if (print_cooc_counts) os << " ||| " << pp.cfe << " " << pp.cf << " " << pp.ce;
+  if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
+  return os;
+}
+void print(int a, int b, int c, int d, float p)
+{
+  std::cerr << a << "\t" << b << "\t P=" << p << "\n"
+            << c << "\t" << d << "\t xf="
+            << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
+}
+// 2x2 (one-sided) Fisher's exact test
+// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
+double fisher_exact(int cfe, int ce, int cf)
+{
+  assert(cfe <= ce);
+  assert(cfe <= cf);
+  int a = cfe;
+  int b = (cf - cfe);
+  int c = (ce - cfe);
+  int d = (num_lines - ce - cf + cfe);
+  int n = a + b + c + d;
+  double cp = exp(lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d)
+                  - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c)
+                  - lgamma(1+d));
+  double total_p = 0.0;
+  int tc = std::min(b,c);
+  for (int i=0; i<=tc; i++) {
+    total_p += cp;
+    double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
+    cp *= coef;
+    ++a;
+    --c;
+    ++d;
+    --b;
+  }
+  return total_p;
+}
+template <class setType>
+void ordered_set_intersect(setType& out, const setType set_1, const setType set_2)
+{
+    std::set_intersection(set_1->begin(), set_1->end(), set_2->begin(),
+                          set_2->end(), inserter(*out, out->begin()) );
+}
+void lookup_phrase(SentIdSet& ids, const std::string& phrase,
+                   tsa_t &my_sa, tind_t &my_v, Cache& cache)
+{
+    ids = cache.get(phrase);
+    if(ids->empty()) {
+      std::vector<sapt::id_type> snt;
+      my_v.fillIdSeq(phrase, snt);
+      tsa_t::tree_iterator m(&my_sa);
+      size_t k = 0;
+      while (k < snt.size() && m.extend(snt[k])) ++k;
+      if(k == snt.size()) {
+        ids->reserve(m.approxOccurrenceCount()+10);
+        sapt::tsa::ArrayEntry I(m.lower_bound(-1));
+        char const* stop = m.upper_bound(-1);
+        do {
+          m.root->readEntry(I.next,I);
+          ids->push_back(I.sid);
+        } while (I.next != stop);
+        std::sort(ids->begin(), ids->end());
+        SentIdSet::element_type::iterator it =
+          std::unique(ids->begin(), ids->end());
+        ids->resize(it - ids->begin());
+        if(ids->size() >= MINIMUM_SIZE_TO_KEEP)
+          cache.put(phrase, ids);
+      }
+    }
+}
+void lookup_multiple_phrases(SentIdSet& ids, std::vector<std::string> & phrases,
+                             tsa_t & my_sa, tind_t &my_v,
+                             const std::string & rule, Cache& cache)
+{
+    if (phrases.size() == 1) {
+        lookup_phrase(ids, phrases.front(), my_sa, my_v, cache);
+    }
+    else {
+        SentIdSet main_set( new SentIdSet::element_type() );
+        bool first = true;
+        SentIdSet first_set( new SentIdSet::element_type() );
+        lookup_phrase(first_set, phrases.front(), my_sa, my_v, cache);
+        for (std::vector<std::string>::iterator phrase=phrases.begin()+1;
+             phrase != phrases.end(); ++phrase) {
+            SentIdSet temp_set( new SentIdSet::element_type() );
+            lookup_phrase(temp_set, *phrase, my_sa, my_v, cache);
+            if (first) {
+                ordered_set_intersect(main_set, first_set, temp_set);
+                first = false;
+            }
+            else {
+                SentIdSet new_set( new SentIdSet::element_type() );
+                ordered_set_intersect(new_set, main_set, temp_set);
+                main_set->swap(*new_set);
+            }
+        }
+        ids->swap(*main_set);
+    }
+}
+void find_occurrences(SentIdSet& ids, const std::string& rule,
+                      tsa_t& my_sa, tind_t &my_v, Cache& cache)
+{
+    // we search for hierarchical rules by stripping away NT and looking for terminals sequences
+    // if a rule contains multiple sequences of terminals, we intersect their occurrences.
+    if (hierarchical) {
+        //   std::cerr << "splitting up phrase: " << phrase << "\n";
+        int pos = 0;
+        int NTStartPos, NTEndPos;
+        std::vector<std::string> phrases;
+        while (rule.find("] ", pos) < rule.size()) {
+            NTStartPos = rule.find("[",pos) - 1; // -1 to cut space before NT
+            NTEndPos = rule.find("] ",pos);
+            if (NTStartPos < pos) { // no space: NT at start of rule (or two consecutive NTs)
+                pos = NTEndPos + 2;
+                continue;
+            }
+            phrases.push_back(rule.substr(pos,NTStartPos-pos));
+            pos = NTEndPos + 2;
+        }
+        NTStartPos = rule.find("[",pos) - 1; // LHS of rule
+        if (NTStartPos > pos) {
+            phrases.push_back(rule.substr(pos,NTStartPos-pos));
+        }
+        lookup_multiple_phrases(ids, phrases, my_sa, my_v, rule, cache);
+    }
+    else {
+        lookup_phrase(ids, rule, my_sa, my_v, cache);
+    }
+}
+// input: unordered list of translation options for a single source phrase
+void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
+{
+  if (pfe_filter_limit > 0 && options.size() > pfe_filter_limit) {
+    nremoved_pfefilter += (options.size() - pfe_filter_limit);
+    std::nth_element(options.begin(), options.begin() + pfe_filter_limit,
+                     options.end(), PfeComparer());
+    for (std::vector<PTEntry*>::iterator i = options.begin() + pfe_filter_limit;
+         i != options.end(); ++i)
+      delete *i;
+    options.erase(options.begin() + pfe_filter_limit,options.end());
+  }
+  if (pef_filter_only)
+    return;
+  if (options.empty())
+    return;
+  size_t cf = 0;
+  std::vector<SentIdSet> fsets;
+  BOOST_FOREACH(boost::shared_ptr<SA>& f_sa, f_sas) {
+    fsets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
+    find_occurrences(fsets.back(), options.front()->f_phrase, f_sa->I, f_sa->V, f_sa->cache);
+    cf += fsets.back()->size();
+  }
+  for (std::vector<PTEntry*>::iterator i = options.begin();
+       i != options.end(); ++i) {
+    const std::string& e_phrase = (*i)->e_phrase;
+    size_t ce = 0;
+    std::vector<SentIdSet> esets;
+    BOOST_FOREACH(boost::shared_ptr<SA>& e_sa,  e_sas) {
+      esets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
+      find_occurrences(esets.back(), e_phrase, e_sa->I, e_sa->V, e_sa->cache);
+      ce += esets.back()->size();
+    }
+    size_t cef = 0;
+    for(size_t j = 0; j < fsets.size(); ++j) {
+      SentIdSet efset( new SentIdSet::element_type() );
+      ordered_set_intersect(efset, fsets[j], esets[j]);
+      cef += efset->size();
+    }
+    double nlp = -log(fisher_exact(cef, cf, ce));
+    (*i)->set_cooc_stats(cef, cf, ce, nlp);
+  }
+  std::vector<PTEntry*>::iterator new_end =
+    std::remove_if(options.begin(), options.end(),
+                   NlogSigThresholder(sig_filter_limit));
+  nremoved_sigfilter += (options.end() - new_end);
+  options.erase(new_end,options.end());
+}
+void filter_thread(std::istream* in, std::ostream* out, int pfe_index) {
+  std::vector<std::string> lines;
+  std::string prev = "";
+  std::vector<PTEntry*> options;
+  while(true) {
+    {
+      boost::mutex::scoped_lock lock(in_mutex);
+      if(in->eof())
+        break;
+      lines.clear();
+      std::string line;
+      while(getline(*in, line) && lines.size() < 500000)
+        lines.push_back(line);
+    }
+    std::stringstream out_temp;
+    for(std::vector<std::string>::iterator it = lines.begin(); it != lines.end(); it++) {
+      size_t tmp_lines = ++pt_lines;
+      if(tmp_lines % 10000 == 0) {
+        boost::mutex::scoped_lock lock(err_mutex);
+        std::cerr << ".";
+        if(tmp_lines % 500000 == 0)
+          std::cerr << "[n:" << tmp_lines << "]\n";
+        if(tmp_lines % 10000000 == 0) {
+          float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+          float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+          std::cerr << "------------------------------------------------------\n"
+                    << "  unfiltered phrases pairs: " << pt_lines << "\n"
+                    << "\n"
+                    << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+                    << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+                    << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+                    << "\n"
+                    << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+                    << "------------------------------------------------------\n";
+        }
+      }
+      if(pt_lines % 10000 == 0) {
+        BOOST_FOREACH(boost::shared_ptr<SA> f_sa, f_sas)
+          f_sa->cache.prune();
+        BOOST_FOREACH(boost::shared_ptr<SA> e_sa, e_sas)
+          e_sa->cache.prune();
+      }
+      if(it->length() > 0) {
+        PTEntry* pp = new PTEntry(it->c_str(), pfe_index);
+        if (prev != pp->f_phrase) {
+          prev = pp->f_phrase;
+          if (!options.empty()) {  // always true after first line
+            compute_cooc_stats_and_filter(options);
+          }
+          for (std::vector<PTEntry*>::iterator i = options.begin();
+               i != options.end(); ++i) {
+            out_temp << **i << '\n';
+            delete *i;
+          }
+          options.clear();
+          options.push_back(pp);
+        } else {
+          options.push_back(pp);
+        }
+      }
+    }
+    boost::mutex::scoped_lock lock(out_mutex);
+    *out << out_temp.str() << std::flush;
+  }
+  compute_cooc_stats_and_filter(options);
+  boost::mutex::scoped_lock lock(out_mutex);
+  for (std::vector<PTEntry*>::iterator i = options.begin();
+       i != options.end(); ++i) {
+    *out << **i << '\n';
+    delete *i;
+  }
+  *out << std::flush;
+}
+namespace po = boost::program_options;
+int main(int argc, char * argv[])
+{
+  bool help;
+  std::vector<std::string> efiles;
+  std::vector<std::string> ffiles;
+  int pfe_index = 2;
+  int threads = 1;
+  size_t max_cache = 0;
+  std::string str_sig_filter_limit;
+  po::options_description general("General options");
+  general.add_options()
+    ("english,e", po::value<std::vector<std::string> >(&efiles)->multitoken(),
+     "english.suf-arr")
+    ("french,f", po::value<std::vector<std::string> >(&ffiles)->multitoken(),
+     "french.suf-arr")
+    ("pfe-index,i", po::value(&pfe_index)->default_value(2),
+     "Index of P(f|e) in phrase table")
+    ("pfe-filter-limit,n", po::value(&pfe_filter_limit)->default_value(0),
+     "0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements")
+    ("threads,t", po::value(&threads)->default_value(1),
+     "number of threads to use")
+    ("max-cache,m", po::value(&max_cache)->default_value(0),
+     "limit cache to  arg  most recent phrases")
+    ("print-cooc,c", po::value(&print_cooc_counts)->zero_tokens()->default_value(false),
+     "add the coocurrence counts to the phrase table")
+    ("print-significance,p", po::value(&print_neglog_significance)->zero_tokens()->default_value(false),
+     "add -log(significance) to the phrase table")
+    ("hierarchical,x", po::value(&hierarchical)->zero_tokens()->default_value(false),
+     "filter hierarchical rule table")
+    ("sig-filter-limit,l", po::value(&str_sig_filter_limit),
+     ">0.0, a+e, or a-e: keep values that have a -log significance > this")
+    ("help,h", po::value(&help)->zero_tokens()->default_value(false),
+     "display this message")
+  ;
+  po::options_description cmdline_options("Allowed options");
+  cmdline_options.add(general);
+  po::variables_map vm;
+  try {
+    po::store(po::command_line_parser(argc,argv).
+              options(cmdline_options).run(), vm);
+    po::notify(vm);
+  }
+  catch (std::exception& e) {
+    std::cout << "Error: " << e.what() << std::endl << std::endl;
+    usage();
+    std::cout << cmdline_options << std::endl;
+    exit(0);
+  }
+  if(vm["help"].as<bool>()) {
+    usage();
+    std::cout << cmdline_options << std::endl;
+    exit(0);
+  }
+  if(vm.count("pfe-filter-limit"))
+    std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
+  if(vm.count("threads"))
+    std::cerr << "Using threads: " << threads << std::endl;
+  if(vm.count("max-cache"))
+    std::cerr << "Using max phrases in caches: " << max_cache << std::endl;
+  if (strcmp(str_sig_filter_limit.c_str(),"a+e") == 0) {
+    sig_filter_limit = ALPHA_PLUS_EPS;
+  } else if (strcmp(str_sig_filter_limit.c_str(),"a-e") == 0) {
+    sig_filter_limit = ALPHA_MINUS_EPS;
+  } else {
+    char *x;
+    sig_filter_limit = strtod(str_sig_filter_limit.c_str(), &x);
+    if (sig_filter_limit < 0.0) {
+      std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n";
+      usage();
+    }
+  }
+  if (sig_filter_limit == 0.0) pef_filter_only = true;
+  //-----------------------------------------------------------------------------
+  if (optind != argc || ((efiles.empty() || ffiles.empty()) && !pef_filter_only)) {
+    usage();
+  }
+  if (!pef_filter_only) {
+    size_t elines = 0;
+    BOOST_FOREACH(std::string& efile, efiles) {
+      e_sas.push_back(boost::shared_ptr<SA>(new SA()));
+      e_sas.back()->V.open(efile + ".tdx");
+      e_sas.back()->T.reset(new ttrack_t());
+      e_sas.back()->T->open(efile + ".mct");
+      e_sas.back()->I.open(efile + ".sfa", e_sas.back()->T);
+      elines += e_sas.back()->T->size();
+    }
+    size_t flines = 0;
+    BOOST_FOREACH(std::string& ffile, ffiles) {
+      f_sas.push_back(boost::shared_ptr<SA>(new SA()));
+      f_sas.back()->V.open(ffile + ".tdx");
+      f_sas.back()->T.reset(new ttrack_t());
+      f_sas.back()->T->open(ffile + ".mct");
+      f_sas.back()->I.open(ffile + ".sfa", f_sas.back()->T);
+      flines += f_sas.back()->T->size();
+    }
+    if (elines != flines) {
+      std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
+      usage();
+      exit(1);
+    } else {
+      std::cerr << "Training corpus: " << elines << " lines\n";
+      num_lines = elines;
+    }
+    p_111 = -log(fisher_exact(1,1,1));
+    std::cerr << "\\alpha = " << p_111 << "\n";
+    if (sig_filter_limit == ALPHA_MINUS_EPS) {
+      sig_filter_limit = p_111 - 0.001;
+    } else if (sig_filter_limit == ALPHA_PLUS_EPS) {
+      sig_filter_limit = p_111 + 0.001;
+    }
+    std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
+  } else {
+    std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl;
+  }
+  Cache::set_max_cache(max_cache);
+  std::ios_base::sync_with_stdio(false);
+  boost::thread_group threadGroup;
+  for(int i = 0; i < threads; i++)
+    threadGroup.add_thread(new boost::thread(filter_thread, &std::cin, &std::cout, pfe_index));
+  threadGroup.join_all();
+  float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+  float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+  std::cerr << "\n\n------------------------------------------------------\n"
+            << "  unfiltered phrases pairs: " << pt_lines << "\n"
+            << "\n"
+            << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+            << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+            << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+            << "\n"
+            << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+            << "------------------------------------------------------\n";
+}

mosesdecoder/moses/TranslationModel/UG/ptable-describe-features.cc ADDED Viewed

	@@ -0,0 +1,40 @@

+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+#include "mmsapt.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+#include "moses/parameters/AllOptions.h"
+using namespace Moses;
+using namespace sapt;
+using namespace std;
+using namespace boost;
+int main()
+{
+  string line;
+  while(getline(cin,line))
+    {
+      if (line.empty()) continue;
+      size_t k = line.find_first_not_of(" ");
+      if (line.find("Mmsapt") != k &&
+          line.find("PhraseDictionaryBitextSampling") != k)
+        continue;
+      AllOptions::ptr opts(new AllOptions);
+      Mmsapt PT(line);
+      PT.Load(opts, false);
+      cout << PT.GetName() << ":" << endl;
+      vector<string> const& fnames = PT.GetFeatureNames();
+      BOOST_FOREACH(string const& s, fnames)
+        cout << s << endl;
+      cout << endl;
+    }
+  exit(0);
+}

mosesdecoder/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h ADDED Viewed

	@@ -0,0 +1,39 @@

+// -*- c++ -*-
+// Phrase scorer that records the aggregated bias score
+//
+#include "util/exception.hh"
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+#include <cstdio>
+namespace sapt  {
+  template<typename Token>
+  class
+  PScoreCumBias : public PhraseScorer<Token>
+  {
+    float m_floor;
+  public:
+    PScoreCumBias(std::string const spec)
+    {
+      this->m_index = -1;
+      this->m_feature_names.push_back("cumb");
+      this->m_num_feats = this->m_feature_names.size();
+      this->m_floor = std::atof(spec.c_str());
+    }
+    bool
+    isIntegerValued(int i) const { return false; }
+    void
+    operator()(Bitext<Token> const& bt,
+         PhrasePair<Token>& pp,
+         std::vector<float> * dest = NULL) const
+    {
+      if (!dest) dest = &pp.fvals;
+      (*dest)[this->m_index] = log(std::max(m_floor,pp.cum_bias));
+    }
+  };
+} // namespace sapt

mosesdecoder/moses/TranslationModel/UG/sapt_pscore_logcnt.h ADDED Viewed

	@@ -0,0 +1,62 @@

+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+namespace sapt  {
+  template<typename Token>
+  class
+  PScoreLogCnt : public PhraseScorer<Token>
+  {
+    std::string m_specs;
+  public:
+    PScoreLogCnt(std::string const specs)
+    {
+      this->m_index = -1;
+      this->m_specs = specs;
+      if (specs.find("r1") != std::string::npos) // raw source phrase counts
+	this->m_feature_names.push_back("log-r1");
+      if (specs.find("s1") != std::string::npos)
+	this->m_feature_names.push_back("log-s1"); // L1 sample size
+      if (specs.find("g1") != std::string::npos) // coherent phrases
+	this->m_feature_names.push_back("log-g1");
+      if (specs.find("j") != std::string::npos) // joint counts
+	this->m_feature_names.push_back("log-j");
+      if (specs.find("r2") != std::string::npos) // raw target phrase counts
+	this->m_feature_names.push_back("log-r2");
+      this->m_num_feats = this->m_feature_names.size();
+    }
+    bool
+    isIntegerValued(int i) const { return true; }
+    void
+    operator()(Bitext<Token> const& bt,
+         PhrasePair<Token>& pp,
+         std::vector<float> * dest = NULL) const
+    {
+      if (!dest) dest = &pp.fvals;
+      assert(pp.raw1);
+      assert(pp.sample1);
+      assert(pp.good1);
+      assert(pp.joint);
+      assert(pp.raw2);
+      size_t i = this->m_index;
+      if (m_specs.find("r1") != std::string::npos)
+	(*dest)[i++] = log(pp.raw1);
+      if (m_specs.find("s1") != std::string::npos)
+	(*dest)[i++] = log(pp.sample1);
+      if (m_specs.find("g1") != std::string::npos)
+	(*dest)[i++] = log(pp.good1);
+      if (m_specs.find("j") != std::string::npos)
+	(*dest)[i++] = log(pp.joint);
+      if (m_specs.find("r2") != std::string::npos)
+	(*dest)[i] = log(pp.raw2);
+    }
+  };
+} // namespace sapt

mosesdecoder/moses/TranslationModel/UG/sapt_pscore_rareness.h ADDED Viewed

	@@ -0,0 +1,38 @@

+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+namespace sapt  {
+  // rareness penalty: x/(n+x)
+  template<typename Token>
+  class
+  PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
+  {
+  public:
+    PScoreRareness(std::string const spec)
+    {
+      this->m_tag = "rare";
+      this->init(spec);
+    }
+    bool
+    isLogVal(int i) const { return false; }
+    void
+    operator()(Bitext<Token> const& bt,
+         PhrasePair<Token>& pp,
+         std::vector<float> * dest = NULL) const
+    {
+      if (!dest) dest = &pp.fvals;
+      size_t i = this->m_index;
+      BOOST_FOREACH(float const x, this->m_x)
+	(*dest).at(i++) = x/(x + pp.joint);
+    }
+  };
+} // namespace sapt

mosesdecoder/moses/TranslationModel/UG/sapt_pscore_wordcount.h ADDED Viewed

	@@ -0,0 +1,33 @@

+// -*- c++ -*-
+// written by Ulrich Germann
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "sapt_pscore_base.h"
+namespace sapt
+{
+  template<typename Token>
+  class
+  PScoreWC : public PhraseScorer<Token>
+  {
+  public:
+    PScoreWC(std::string const dummy)
+    {
+      this->m_index = -1;
+      this->m_num_feats = 1;
+      this->m_feature_names.push_back(std::string("wordcount"));
+    }
+    void
+    operator()(Bitext<Token> const& bt,
+        PhrasePair<Token>& pp,
+        std::vector<float> * dest = NULL) const
+    {
+      if (!dest) dest = &pp.fvals;
+      (*dest)[this->m_index] = pp.len2;
+    }
+  };
+}